This document outlines functional Principal Component analysis on dynamic changes on the PC1 scores from the PCA analysis.

1 Preliminaries

1.1 loading packages and machine setting

library(fdapace)
library(ggplot2)
library(tidyverse)
library(brms)
library(scales)
library(grid)
library(gridExtra)
library(ggpubr)
library(ggsci)
library(emuR)
library(emmeans)
theme_set(theme_classic())
options(mc.cores = parallel::detectCores())

1.2 loading data

# Tongue spline tracking data
load(file = "data/int.350.rda")

# Participant info
load(file = "data/par.rda")

1.3 participant info

# select participants that are included in the analysis
sp <- int.350 %>% 
  select(speaker)

sp <- merge(sp, par, by.x = "speaker")

# N of speakers by L1
sp %>% 
  group_by(L1) %>% 
  summarise(n = n_distinct(speaker)) %>% 
  ungroup()
## # A tibble: 3 Ă— 2
##   L1                  n
##   <chr>           <int>
## 1 English            13
## 2 Japanese           29
## 3 Polish, English     1
# Country
sp %>% 
  group_by(L1, country) %>% 
  summarise(n = n_distinct(speaker)) %>% 
  ungroup()
## `summarise()` has grouped output by 'L1'. You can override using the `.groups`
## argument.
## # A tibble: 4 Ă— 3
##   L1              country     n
##   <chr>           <chr>   <int>
## 1 English         Canada      4
## 2 English         US          9
## 3 Japanese        Japan      29
## 4 Polish, English Poland      1
# fluency rating
sp %>% 
  mutate(
    primary_lang = case_when(
      L1 == "Japanese" ~ "Japanese",
      TRUE ~ "English"
    )
  ) %>% 
  group_by(primary_lang) %>% 
  summarise(
    mean_fluency = mean(fluency),
    sd_fluency = sd(fluency),
    mean_use = mean(use),
    sd_use = sd(use),
    mean_familiarity = mean(familiarity),
    sd_familiarity = sd(familiarity)
  ) %>% 
  ungroup()
## # A tibble: 2 Ă— 7
##   primary_lang mean_fluency sd_fluency mean_use sd_use mean_familiarity
##   <chr>               <dbl>      <dbl>    <dbl>  <dbl>            <dbl>
## 1 English              7          0        6.57  0.880             7   
## 2 Japanese             3.71       1.04     3.66  1.08              3.87
## # ℹ 1 more variable: sd_familiarity <dbl>
## Japanese data
sp.jp <- sp %>% 
  filter(L1 == "Japanese")

## English study
sp.jp %>% 
  rename(
    overseas = `overseas (month: 1wk = 0.25m)`
  ) %>% 
  summarise(mean_study = mean(as.numeric(English_study)),
            sd_study = sd(as.numeric(English_study)),
            mean_month_overseas = mean(as.numeric(overseas)),
            sd_month_overseas = sd(as.numeric(overseas)))
##   mean_study sd_study mean_month_overseas sd_month_overseas
## 1   8.822862 2.064805           0.6463423          1.235134

1.4 some descriptive statistics

# matching tongue data with participant info
int.350 <- merge(int.350, par, by.x = "speaker", by.y = "speaker") %>% 
  select(-number, -L1.y) %>% 
  rename(
    L1 = L1.x
  )

# N of speaker = 43
int.350 %>% 
  group_by(L1) %>% 
  summarise(speaker = n_distinct(speaker),
            mean_age = mean(age),
            sd_age = sd(age)) %>% 
  ungroup()
## # A tibble: 2 Ă— 4
##   L1       speaker mean_age sd_age
##   <chr>      <int>    <dbl>  <dbl>
## 1 English       14     29.1  6.25 
## 2 Japanese      29     19.6  0.941
# N of prompts
int.350 %>% 
  group_by(L1, segment, vowel) %>% 
  summarise(n = n_distinct(exclude_key)) %>% 
  ungroup()
## `summarise()` has grouped output by 'L1', 'segment'. You can override using the
## `.groups` argument.
## # A tibble: 15 Ă— 4
##    L1       segment vowel     n
##    <chr>    <chr>   <chr> <int>
##  1 English  /l/     /a/     193
##  2 English  /l/     /i/     193
##  3 English  /l/     /u/     130
##  4 English  /Éą/     /a/     192
##  5 English  /Éą/     /i/     194
##  6 English  /Éą/     /u/     130
##  7 Japanese /l/     /a/     295
##  8 Japanese /l/     /i/     301
##  9 Japanese /l/     /u/     197
## 10 Japanese /Éą/     /a/     305
## 11 Japanese /Éą/     /i/     301
## 12 Japanese /Éą/     /u/     199
## 13 Japanese /Éľ/     /a/     180
## 14 Japanese /Éľ/     /i/      90
## 15 Japanese /Éľ/     /u/     175
int.350 %>% 
  group_by(L1, segment) %>% 
  summarise(n = n_distinct(exclude_key)) %>% 
  ungroup()
## `summarise()` has grouped output by 'L1'. You can override using the `.groups`
## argument.
## # A tibble: 5 Ă— 3
##   L1       segment     n
##   <chr>    <chr>   <int>
## 1 English  /l/       516
## 2 English  /Éą/       516
## 3 Japanese /l/       793
## 4 Japanese /Éą/       805
## 5 Japanese /Éľ/       445

2 analysis 1: principal component analysis

2.1 running PCA

# PCA: -350ms onset ----------------------------------------------------------------
## Data Preparation
int.350 <- int.350 %>% 
  select(speaker:country)

### Convert data frame into a PCA-friendly format
int.350.xy <- int.350 %>%
  group_by(speaker) %>%
  mutate(
    X_z = scale(X),
    Y_z = scale(Y)
  ) %>%
  ungroup() %>%
  dplyr::select(-X, -Y) %>%
  pivot_wider(
    names_from = point_number,
    values_from = c(X_z, Y_z)
  )

### Check 1
# int.350.xy %>%
#   filter(speaker == "3wy8us",
#          prompt == "ram",
#          repetition == "1") %>% 
#   group_by(speaker, prompt, repetition, time, interval_350, phone) %>%
#   summarise() %>% 
#   ungroup() %>% 
#   print(n = Inf)

### Check 2
# int.350.xy %>%
#   filter(speaker == "2d57ke") %>% 
#   group_by(speaker, prompt, repetition) %>%
#   summarise() %>% 
#   ungroup() %>% 
#   print(n = Inf)

### Check 3
# int.350.xy %>% 
#   group_by(speaker) %>% 
#   summarise() %>% 
#   ungroup() %>% 
#   print(n = Inf)

### check column names
# colnames(int.350.xy)

### Remove and save meta data 
int.350.pca <- int.350.xy %>% 
  dplyr::select(-speaker, -rec_date, -time, -prompt, -L1, -frame_number, -spline_number, -repetition, -segment, -phone, -position, -interval_350, -vowel, -exclude_key, -start_time, -end_time, -total_duration, -proportional_time, -vowel_start, -vowel_start_prop, -acoustic_start, -acoustic_start_prop, -gender, -age, -country)
# remove the meta data that we don't need for PCA

meta.350 <- int.350.xy %>% 
  dplyr::select(speaker, rec_date, time, prompt, L1, frame_number, spline_number, repetition, segment, phone, position, interval_350, vowel, exclude_key, start_time, end_time, total_duration, proportional_time, vowel_start, vowel_start_prop, acoustic_start, acoustic_start_prop, gender, age, country)
# separate and save the meta data for later

### Check if there are any NAs 

table(is.na(int.350.pca))
## 
##   FALSE 
## 3556476
# int.350.pca <- drop_na(int.350.pca) # run this only when the table(is.na()) returns TRUE values

### Run PCA on all the liquid tokens
pca.350 <- princomp(int.350.pca)

summary(pca.350) 
## Importance of components:
##                           Comp.1    Comp.2     Comp.3     Comp.4     Comp.5
## Standard deviation     0.6801958 0.6005386 0.34153349 0.28791377 0.24685574
## Proportion of Variance 0.3927849 0.3061743 0.09902695 0.07037391 0.05173367
## Cumulative Proportion  0.3927849 0.6989591 0.79798608 0.86836000 0.92009367
##                            Comp.6     Comp.7     Comp.8     Comp.9     Comp.10
## Standard deviation     0.17905382 0.16737926 0.11178715 0.08286500 0.064901130
## Proportion of Variance 0.02721786 0.02378429 0.01060891 0.00582947 0.003575949
## Cumulative Proportion  0.94731153 0.97109582 0.98170472 0.98753419 0.991110142
##                            Comp.11     Comp.12      Comp.13      Comp.14
## Standard deviation     0.057522053 0.040252630 0.0338309405 0.0298843284
## Proportion of Variance 0.002809025 0.001375547 0.0009716615 0.0007581826
## Cumulative Proportion  0.993919167 0.995294714 0.9962663753 0.9970245580
##                             Comp.15      Comp.16      Comp.17      Comp.18
## Standard deviation     0.0280492532 0.0251609230 0.0237700494 0.0211586285
## Proportion of Variance 0.0006679277 0.0005374524 0.0004796749 0.0003800685
## Cumulative Proportion  0.9976924856 0.9982299380 0.9987096129 0.9990896814
##                            Comp.19      Comp.20      Comp.21      Comp.22
## Standard deviation     0.018903311 0.0177535200 0.0158079519 0.0122418144
## Proportion of Variance 0.000303363 0.0002675813 0.0002121476 0.0001272267
## Cumulative Proportion  0.999393044 0.9996606257 0.9998727733 1.0000000000
# Summarise to see how much variation each PCA accounts for

2.2 variation explained by each PC

### Plotting the variance explained (optional)
var.explained.350 <- pca.350$sdev^2 / sum(pca.350$sdev^2)

# making var_explained as a tibble and add colname
var.explained.350 <- as_tibble(var.explained.350)
var.explained.350 <- var.explained.350 %>% 
  as_tibble() %>% 
  mutate(
    PC = row_number()
  )

# create a plot
var.explained.350.PC10 <- var.explained.350 %>% 
  filter(PC < 11) # only plot PC10 or below 

var.plot.350 <- var.explained.350.PC10 %>%
  ggplot(mapping = aes(x = PC, y = value)) +
  geom_line() +
  geom_text(data = subset(var.explained.350.PC10, PC < 5), aes(label = round(value, digits = 5)), nudge_x = 0.8) +
  geom_label(data = subset(var.explained.350.PC10, PC < 5), aes(label = PC), label.padding = unit(0.40, "lines")) +
  geom_point(data = subset(var.explained.350.PC10, PC > 5)) +
  geom_hline(yintercept = 0.05, linetype = 'dotted') +
  xlab("Principal Component") +
  ylab("Variance Explained") +
  ggtitle("Proportion of Variance explained by each PC") +
  # ylim(0, 0.6) +
  theme_classic() +
  theme(plot.title = element_text(size = 18, face = "bold"), 
        axis.text = element_text(size = 12),
        axis.title = element_text(size = 15),
        strip.text.x = element_text(size = 15),
        strip.text.y = element_text(size = 15, angle = 0),
        legend.text = element_text(size = 12),
        legend.position = "bottom",
        legend.key.width = unit(3, "cm")
        # legend.title = element_blank()
  )

var.plot.350

# the plot shows that the first 4 PCs account for more than 5% of the variation in the data

ggsave(var.plot.350, filename = "figure/varplot_350ms.png", width = 10, height = 5, dpi = 1000)

2.3 preparing for PCA plot

## Preparing PCA plots
# Get the results of the PCA which are useful
pca.number.350 <- pca.350$scores

# Put it into a sensible format as some variables come out weird
pca.number.350 <- as_tibble(pca.number.350)

# Combine with non-numeric information from earlier
pca.result.350 <- cbind(meta.350, pca.number.350)

# normalise PCs by speaker for comparison
pca.result.350 = pca.result.350 %>% 
  group_by(speaker) %>% 
  mutate(
    PC1z = scale(Comp.1),
    PC2z = scale(Comp.2),
    PC3z = scale(Comp.3),
    PC4z = scale(Comp.4)
  )

## Work out parameters of variation in first 3 PCs
# Mean values from the output of the PCA
mean.pca.350 <- tibble::enframe(pca.350$center)

## make subsettable variable
mean.pca.350 <- mean.pca.350 %>% 
  mutate(axis = substr(mean.pca.350$name, 1, 1))

## subset data to make into a matrix of x and y values
X <- subset(mean.pca.350, mean.pca.350$axis == 'X')
Y <- subset(mean.pca.350, mean.pca.350$axis == 'Y')
mean.pca.350 <- cbind(X, Y)

## changing colnames
colnames(mean.pca.350) = c("number1", "mean.x", "axis1", "number2", "mean.y", "axis2")

## get loadings - eigenvectors
loadings.350 <- as.table(pca.350$loadings)

# PC1 ---------------------------------------------------------------------
## get loadings for PC1 in a sensible format
PC1.l.350 <- as.data.frame(loadings.350) %>% 
  filter(Var2 == "Comp.1")

PC1.l.350 <- PC1.l.350 %>% 
  mutate(axis = substr(PC1.l.350$Var1, 1, 1))

PC1.l.350.x <- subset(PC1.l.350, PC1.l.350$axis == 'X')
PC1.l.350.y <- subset(PC1.l.350, PC1.l.350$axis == 'Y')

PC1.l.350 <- cbind(PC1.l.350.x, PC1.l.350.y)

colnames(PC1.l.350) = c("useless", "useless2", "PC1.l.350.x", "useless3", "useless4", "useless5", "PC1.l.350.y", "useless6")

PC1.l.350$useless <- NULL
PC1.l.350$useless2 <- NULL
PC1.l.350$useless3 <- NULL
PC1.l.350$useless4 <- NULL
PC1.l.350$useless5 <- NULL
PC1.l.350$useless6 <- NULL

# PC2 ---------------------------------------------------------------------
## get loadings for PC1 in a sensible format
PC2.l.350 <- as.data.frame(loadings.350) %>% 
  filter(Var2 == "Comp.2")

PC2.l.350 <- PC2.l.350 %>% 
  mutate(axis = substr(PC2.l.350$Var1, 1, 1))

PC2.l.350.x <- subset(PC2.l.350, PC2.l.350$axis == 'X')
PC2.l.350.y <- subset(PC2.l.350, PC2.l.350$axis == 'Y')

PC2.l.350 <- cbind(PC2.l.350.x, PC2.l.350.y)

colnames(PC2.l.350) = c("useless", "useless2", "PC2.l.350.x", "useless3", "useless4", "useless5", "PC2.l.350.y", "useless6")

PC2.l.350$useless <- NULL
PC2.l.350$useless2 <- NULL
PC2.l.350$useless3 <- NULL
PC2.l.350$useless4 <- NULL
PC2.l.350$useless5 <- NULL
PC2.l.350$useless6 <- NULL

# PC3 ---------------------------------------------------------------------
## get loadings for PC3 in a sensible format
PC3.l.350 <- as.data.frame(loadings.350) %>% 
  filter(Var2 == "Comp.3")

PC3.l.350 <- PC3.l.350 %>% 
  mutate(axis = substr(PC3.l.350$Var1, 1, 1))

PC3.l.350.x <- subset(PC3.l.350, PC3.l.350$axis == 'X')
PC3.l.350.y <- subset(PC3.l.350, PC3.l.350$axis == 'Y')

PC3.l.350 <- cbind(PC3.l.350.x, PC3.l.350.y)

colnames(PC3.l.350) = c("useless", "useless2", "PC3.l.350.x", "useless3", "useless4", "useless5", "PC3.l.350.y", "useless6")

PC3.l.350$useless <- NULL
PC3.l.350$useless2 <- NULL
PC3.l.350$useless3 <- NULL
PC3.l.350$useless4 <- NULL
PC3.l.350$useless5 <- NULL
PC3.l.350$useless6 <- NULL

# PC4 ---------------------------------------------------------------------
## get loadings for PC4 in a sensible format
PC4.l.350 <- as.data.frame(loadings.350) %>% 
  filter(Var2 == "Comp.4")

PC4.l.350 <- PC4.l.350 %>% 
  mutate(axis = substr(PC4.l.350$Var1, 1, 1))

PC4.l.350.x <- subset(PC4.l.350, PC4.l.350$axis == 'X')
PC4.l.350.y <- subset(PC4.l.350, PC4.l.350$axis == 'Y')

PC4.l.350 <- cbind(PC4.l.350.x, PC4.l.350.y)

colnames(PC4.l.350) = c("useless", "useless2", "PC4.l.350.x", "useless3", "useless4", "useless5", "PC4.l.350.y", "useless6")

PC4.l.350$useless <- NULL
PC4.l.350$useless2 <- NULL
PC4.l.350$useless3 <- NULL
PC4.l.350$useless4 <- NULL
PC4.l.350$useless5 <- NULL
PC4.l.350$useless6 <- NULL

## Plotting the meaning of PCs
# bind together all of the above
loadings.350 <- cbind(PC1.l.350, PC2.l.350, PC3.l.350, PC4.l.350)

# get sds of first 4 PCs
sd.350 <- tibble::enframe(pca.350$sdev)
sd_PC1.350 <- as.numeric(sd.350[1,2])
sd_PC2.350 <- as.numeric(sd.350[2,2])
sd_PC3.350 <- as.numeric(sd.350[3,2])
sd_PC4.350 <- as.numeric(sd.350[4,2])

# calculate estimated values including sd
# midpoint model
estimate.350 <- cbind(mean.pca.350, loadings.350)
estimate.350$PC1.max.x <- estimate.350$mean.x + sd_PC1.350*estimate.350$PC1.l.350.x
estimate.350$PC1.min.x <- estimate.350$mean.x - sd_PC1.350*estimate.350$PC1.l.350.x
estimate.350$PC1.max.y <- estimate.350$mean.y + sd_PC1.350*estimate.350$PC1.l.350.y
estimate.350$PC1.min.y <- estimate.350$mean.y - sd_PC1.350*estimate.350$PC1.l.350.y

estimate.350$PC2.max.x <- estimate.350$mean.x + sd_PC2.350*estimate.350$PC2.l.350.x
estimate.350$PC2.min.x <- estimate.350$mean.x - sd_PC2.350*estimate.350$PC2.l.350.x
estimate.350$PC2.max.y <- estimate.350$mean.y + sd_PC2.350*estimate.350$PC2.l.350.y
estimate.350$PC2.min.y <- estimate.350$mean.y - sd_PC2.350*estimate.350$PC2.l.350.y

estimate.350$PC3.max.x <- estimate.350$mean.x + sd_PC3.350*estimate.350$PC3.l.350.x
estimate.350$PC3.min.x <- estimate.350$mean.x - sd_PC3.350*estimate.350$PC3.l.350.x
estimate.350$PC3.max.y <- estimate.350$mean.y + sd_PC3.350*estimate.350$PC3.l.350.y
estimate.350$PC3.min.y <- estimate.350$mean.y - sd_PC3.350*estimate.350$PC3.l.350.y

estimate.350$PC4.max.x <- estimate.350$mean.x + sd_PC4.350*estimate.350$PC4.l.350.x
estimate.350$PC4.min.x <- estimate.350$mean.x - sd_PC4.350*estimate.350$PC4.l.350.x
estimate.350$PC4.max.y <- estimate.350$mean.y + sd_PC4.350*estimate.350$PC4.l.350.y
estimate.350$PC4.min.y <- estimate.350$mean.y - sd_PC4.350*estimate.350$PC4.l.350.y

2.4 plotting PCA

# Make figures ------------------------------------------------------------
# PC1
PC1.350.plot <- ggplot() +
  geom_path(data = estimate.350, aes(x = mean.x, y = mean.y), linewidth = 1.5) +
  geom_path(data = estimate.350, aes(x = PC1.max.x, y = PC1.max.y), linewidth = 1, alpha = 0.5, linetype = "dashed") +
  geom_path(data = estimate.350, aes(x = PC1.min.x, y = PC1.min.y), linewidth = 1, alpha = 0.5, linetype = "dotted") +
  geom_point(data = estimate.350, aes(x = PC1.max.x, y = PC1.max.y), shape = 3, size = 3, stroke = 2) +
  geom_point(data = estimate.350, aes(x = PC1.min.x, y = PC1.min.y), shape = "\u2212", size = 5, stroke = 8) +
  xlab("X") + ylab("Y") +
  ggtitle("PC1") +
  theme_classic() +
  # ylim(-35, 25) +
  theme(plot.title = element_text(size = 15, hjust = 0.5, vjust = 1.5, face = "bold"),
        legend.position = "top",
        legend.title = element_blank(),
        axis.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        strip.text.x = element_text(size = 15)
  )

PC2.350.plot <- ggplot() +
  geom_path(data = estimate.350, aes(x = mean.x, y = mean.y), linewidth = 1.5) +
  geom_path(data = estimate.350, aes(x = PC2.max.x, y = PC2.max.y), linewidth = 1, alpha = 0.5, linetype = "dashed") +
  geom_path(data = estimate.350, aes(x = PC2.min.x, y = PC2.min.y), linewidth = 1, alpha = 0.5, linetype = "dotted") +
  geom_point(data = estimate.350, aes(x = PC2.max.x, y = PC2.max.y), shape = 3, size = 3, stroke = 2) +
  geom_point(data = estimate.350, aes(x = PC2.min.x, y = PC2.min.y), shape = "\u2212", size = 5, stroke = 8) +
  xlab("X") + ylab("Y") +
  ggtitle("PC2") +
  theme_classic() +
  # ylim(-35, 25) +
  theme(plot.title = element_text(size = 15, hjust = 0.5, vjust = 1.5, face = "bold"),
        legend.position = "top",
        legend.title = element_blank(),
        axis.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        strip.text.x = element_text(size = 15)
  )

PC3.350.plot <- ggplot() +
  geom_path(data = estimate.350, aes(x = mean.x, y = mean.y), linewidth = 1.5) +
  geom_path(data = estimate.350, aes(x = PC3.max.x, y = PC3.max.y), linewidth = 1, alpha = 0.5, linetype = "dashed") +
  geom_path(data = estimate.350, aes(x = PC3.min.x, y = PC3.min.y), linewidth = 1, alpha = 0.5, linetype = "dotted") +
  geom_point(data = estimate.350, aes(x = PC3.max.x, y = PC3.max.y), shape = 3, size = 3, stroke = 2) +
  geom_point(data = estimate.350, aes(x = PC3.min.x, y = PC3.min.y), shape = "\u2212", size = 5, stroke = 8) +
  xlab("X") + ylab("Y") +
  ggtitle("PC3") +
  theme_classic() +
  # ylim(-35, 25) +
  theme(plot.title = element_text(size = 15, hjust = 0.5, vjust = 1.5, face = "bold"),
        legend.position = "top",
        legend.title = element_blank(),
        axis.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        strip.text.x = element_text(size = 15)
  )

PC4.350.plot <- ggplot() +
  geom_path(data = estimate.350, aes(x = mean.x, y = mean.y), linewidth = 1.5) +
  geom_path(data = estimate.350, aes(x = PC4.max.x, y = PC4.max.y), linewidth = 1, alpha = 0.5, linetype = "dashed") +
  geom_path(data = estimate.350, aes(x = PC4.min.x, y = PC4.min.y), linewidth = 1, alpha = 0.5, linetype = "dotted") +
  geom_point(data = estimate.350, aes(x = PC4.max.x, y = PC4.max.y), shape = 3, size = 3, stroke = 2) +
  geom_point(data = estimate.350, aes(x = PC4.min.x, y = PC4.min.y), shape = "\u2212", size = 5, stroke = 8) +
  xlab("X") + ylab("Y") +
  ggtitle("PC4") +
  theme_classic() +
  # ylim(-35, 25) +
  theme(plot.title = element_text(size = 15, hjust = 0.5, vjust = 1.5, face = "bold"),
        legend.position = "top",
        legend.title = element_blank(),
        axis.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        strip.text.x = element_text(size = 15)
  )

# Publication plot
pca_meaning_all.350 <- grid.arrange(PC1.350.plot, PC2.350.plot, PC3.350.plot, PC4.350.plot, ncol = 2)

3 analysis 2: functional principal component analysis

3.1 FPCA using fdapace

# IDs = token column; tVec = time column; yVec = variable column(s)
input.PC1 <- fdapace::MakeFPCAInputs(IDs = pca.result.350$exclude_key, tVec = pca.result.350$proportional_time, yVec = pca.result.350$PC1z)

# Check if there's any issues with the data
fdapace::CheckData(input.PC1$Ly, input.PC1$Lt)

# No errors have been returned, so let's now run fPCA on the dynamic PC1 trajectory
PC1 <- fdapace::FPCA(Ly = input.PC1$Ly, Lt = input.PC1$Lt)

# saving the FPC1 output
save(PC1, file = "data/PC1_FPCA_BAAP.rda")

# understanding FPC1/PC1
## eigenvalues
PC1$lambda
## [1] 36.3817095 15.4263673  7.1191478  3.0323552  0.7206187
## the cumulative percentage of variance explained by the eigenvalue
PC1$cumFVE
## [1] 0.5793283 0.8249719 0.9383345 0.9866205 0.9980954
## fPC1: 0.5793283
## fPC2: 0.2456436
## fPC3: 0.1133626
## fPC4: 0.048286


## PC scores -> each row is 1 token, each column is one PC
# PC1$xiEst

## plot
plot(PC1)

## scree plot
# CreateScreePlot(PC1)

## path plot
# CreatePathPlot(PC1, xlab = "normalised time", ylab = "PC1 (tongue body movement)")

# the input data to FPCA() (just in case you want to check the specific input data you used)
# PC1$inputData$Lt

3.2 join PC scores with data + plot

# load the fPCA results for PC1
load(file = "data/PC1_FPCA_BAAP.rda")

# function: get PC scores + return data frame with PCs for each token
get_pc_scores <- function(fpcaObj){
  pcs <- data.frame(fpcaObj$xiEst)
  token <- names(fpcaObj$inputData$Lt) 
  df <- cbind(token, pcs)
  n_pcs <- length(fpcaObj$lambda) # get number of PCs
  pc_names <- paste0("PC", 1:n_pcs) # create colnames for PCs
  names(df) <- c("exclude_key", pc_names) # add colnames for token + PCs
  return(df)
}

# get PC scores w/ token info
pc1_df <- get_pc_scores(PC1)

# join PCs (dat) with selected cols from original data frame 
## store meta info
meta <- pca.result.350 %>% 
  select(speaker, L1, prompt, segment, vowel, exclude_key)

## merge the list and meta data - unique(meta) because otherwise there would be lots of duplicates
dat.PC1 <- left_join(pc1_df, unique(meta), by = "exclude_key")

# PC scores clustering per group
PC1.scatter <- dat.PC1 %>% 
  filter(segment %in% c("/l/", "/Éą/")) %>% 
  # filter(group %in% c("advanced", "English")) %>% 
  ggplot2::ggplot() +
  aes(x = PC1, y = PC2, colour = vowel, shape = vowel) +
  geom_point(alpha = 0.3, size = 2, show.legend = FALSE) +
  stat_ellipse(aes(color = vowel, linetype = vowel), level = 0.95, lwd = 1.2) +
  facet_grid(segment ~ L1) +
  scale_color_manual(values = c("blue4", "brown4", "darkolivegreen")) +
  # ggtitle("Tongue body movement (PC1) for English and Japanese liquids") +
  guides(linetype = "none") +
  theme_classic() +
  theme(legend.text = element_text(size = 30),
        legend.key.size = unit(1, 'cm'),
        legend.position = "bottom",
        legend.title = element_text(size = 30),
        axis.text = element_text(size = 15),
        axis.title = element_text(size = 20),
        plot.title = element_text(size = 20, hjust = 0.5, face = "bold"),
        strip.text.x = element_text(size = 30),
        strip.text.y = element_text(angle = 0, size = 30)
  ) +
  # ylim(c(-4, 4)) +
  labs(x = "FPC1", y = "FPC2", colour = "Adjacent vowel")

PC1.scatter

ggsave(PC1.scatter, filename = "figure/PC1_scatter.png", width = 15, height = 10, dpi = 1000)

3.3 tracking dynamic pca

## PC1
PC1.dyn.350 <- pca.result.350 %>%
  group_by(speaker, prompt, repetition) %>% 
  ggplot() +
  geom_path(aes(x = proportional_time, y = PC1z, colour = vowel, group = rec_date), alpha = 0.03) +
  # geom_smooth(colour = "black", linewidth = 3, se = FALSE, show.legend = TRUE) +
  geom_smooth(aes(x = proportional_time, y = PC1z, colour = vowel, group = vowel), linewidth = 2, se = FALSE, show.legend = TRUE) +
  stat_smooth(aes(x = proportional_time, y = PC1z, colour = vowel, group = vowel), method = "gam", geom = "ribbon", fill = NA, linewidth = 0.5, linetype = 3, show.legend = FALSE) +
  # facet_wrap(segment ~ vowel, ncol = 2) +
  facet_grid(segment ~ L1) +
  # ggtitle("PC1 (Onset: -350 ms)") +
  geom_hline(yintercept = 0, linetype = 1, linewidth = 0.1) +
  geom_vline(aes(xintercept = mean(vowel_start_prop)), linetype = 2, linewidth = 0.5) + 
  # geom_text(aes(x = mean(vowel_start_prop), y = 1.3), label = "vowel\nonset", colour = "Black", size = 5) +  
  geom_vline(aes(xintercept = mean(acoustic_start_prop)), linetype = 2, linewidth = 0.5) + 
  # geom_text(aes(x = mean(acoustic_start_prop), y = 1.3), label = "liquid\nonset", colour = "Black", size = 5) +  
  scale_color_manual(values = c("blue4", "brown4", "darkolivegreen")) +
  theme_classic() +
  theme(legend.text = element_text(size = 30),
        legend.key.size = unit(1, 'cm'),
        legend.position = "bottom",
        legend.title = element_text(size = 30),
        axis.text = element_text(size = 15),
        axis.title = element_text(size = 20),
        plot.title = element_text(size = 20, hjust = 0.5, face = "bold"),
        strip.text.x = element_text(size = 20),
        strip.text.y = element_text(angle = 0, size = 20)
  ) +
  ylim(c(-4, 4)) +
  labs(x = "Proportional time (%)", y = "PC1 (z-score)", colour = "Adjacent vowel") 

PC1.dyn.350

ggsave(PC1.dyn.350, filename = "figure/dynamic_PC1.jpg", width = 15, height = 10, dpi = 1000)

3.4 plotting FPC scores

# function: define perturbation function (±Q = ±sd, k = PC number)
perturbation <- function(fpcaObj, Q, k){
  Q * sqrt(fpcaObj$lambda[k]) * fpcaObj$phi[,k] + fpcaObj$mu
}

# function: create perturbation object with mean and ±Q sd as a data frame (for one PC only)
# can validate against fdapace::GetMeanCurve and fdapace::CreateModeOfVarPlot
perturbation_object <- function(fpcaObj, Q, k){
  time <- fpcaObj$workGrid # grid of time values
  mean <- fpcaObj$mu # mean trajectory
  Qplus <- perturbation(fpcaObj, Q, k) # +Q sd
  Qminus <- perturbation(fpcaObj, -Q, k) # -Q sd
  df <- cbind(time, mean, Qplus, Qminus)
  colnames(df) <- c("time", "mean", "Qplus", "Qminus")
  df <- data.frame(df)
  df$PC <- paste0("PC", k) # add PC colname
  return(df)
}

# function: create perturbation data frame with mean and ±Q sd (for all PCs)
# to do: add ability to pass list of Q values for gradient perturbation function
get_perturbation <- function(fpcaObj, Q){
  n_pcs <- length(fpcaObj$lambda)
  k <- 1:n_pcs
  df <- lapply(k, perturbation_object, fpcaObj=fpcaObj, Q=Q)
  df <- dplyr::bind_rows(df) # unnest lists into single df
  return(df)
}

# get mean trajectory and ±2 sd for all PCs
p_PC1 <- get_perturbation(PC1, Q = 2)

3.5 perturbation plot

# Manually calculating proportional time for liquid onset and offset for plotting
pca.result.350 %>% 
  ungroup() %>% 
  summarise(mean_start = mean(acoustic_start_prop),
            mean_end = mean(vowel_start_prop))
## # A tibble: 1 Ă— 2
##   mean_start mean_end
##        <dbl>    <dbl>
## 1       56.7     68.2
# plot data,  perturbation + PC scores ------------------------------------
# perturbation plot
pc1_perturbation <- p_PC1 %>% 
  filter(PC %in% c("PC1", "PC2")) %>% 
  mutate(
    fPC = case_when(
      PC == "PC1" ~ "fPC1",
      PC == "PC2" ~ "fPC2"
    )
  ) %>% 
  ggplot2::ggplot() +
  aes(x = time, y = mean) +
  geom_path() +
  geom_point(aes(y = Qplus), shape = 3, size = 3, colour = "red") +
  geom_point(aes(y = Qminus), shape = 95, size = 5, colour = "blue") +
  xlab("Proportional Time (%)") + 
  ylab("PC") +
  geom_vline(data = pca.result.350, aes(xintercept = mean(acoustic_start_prop)), linetype = 2) +
  geom_vline(data = pca.result.350, aes(xintercept = mean(vowel_start_prop)), linetype = 2) +
  facet_wrap(~ fPC, ncol = 2) +
  scale_color_manual(values = c("blue4", "brown4", "darkolivegreen")) +
  theme_classic() +
  theme(legend.text = element_text(size = 30),
        legend.key.size = unit(1, 'cm'),
        legend.position = "bottom",
        legend.title = element_text(size = 30),
        axis.text = element_text(size = 15),
        axis.title = element_text(size = 20),
        plot.title = element_text(size = 20, hjust = 0.5, face = "bold"),
        strip.text.x = element_text(size = 20),
        strip.text.y = element_text(angle = 0, size = 20)
  ) +
  labs(x = "Proportional Time(%)", y = "fPC")

pc1_perturbation

ggsave(pc1_perturbation, filename = "figure/perturbation_PC1.jpg", width = 15, height = 5, dpi = 1000)

3.6 FPCA reconstruction

3.6.1 preparation

# mean fPC1 trajectory
# pc1_mean_curve <- fdapace::GetMeanCurve(Ly = input.PC1$Ly, Lt = input.PC1$Lt, optns = list(plot = TRUE))

pc1_mu_values <- data.frame(PC1$mu) # mean curve values
pc1_mu_time <- data.frame(PC1$workGrid) # timepoints used for estimating the curve
pc1_phi <- data.frame(PC1$phi) # eigenfunction at each timepoint: workGrid * nlambda (e.g., 255 = 51 workGrid * 5 lambda)
pc1_lambda <- data.frame(PC1$lambda) # PC loadings for each PC: currently 5

# create a data frame containing mean curve, time and eigenfunctions assocaited with each PC at each time point
## add an extra column 'col_number' as a common index across the data frames - useful when merging everything together later on
### mean curve
pc1_mu_values <- pc1_mu_values %>% 
  mutate(
    col_number = row_number()
  )

### sampling time points
pc1_mu_time <- pc1_mu_time %>% 
  mutate(
    col_number = row_number()
  )

### eigenfunction
pc1_phi <- pc1_phi %>% 
  mutate(
    col_number = row_number()
  )

### pc loadings
pc1_lambda <- pc1_lambda %>% 
  mutate(
    PC = str_c("PC", row_number()),
    PC = str_c(PC, "lambda", sep = "_")
  ) %>% 
  pivot_wider(names_from = "PC", values_from = "PC1.lambda") %>% 
  slice(rep(1:n(), each = 51)) %>% 
  mutate(
    col_number = row_number()
  )
  

## merging all data together one by one
PC1.rec <- left_join(pc1_mu_values, pc1_mu_time, by = "col_number")
PC1.rec <- left_join(PC1.rec, pc1_phi, by = "col_number")
PC1.rec <- left_join(PC1.rec, pc1_lambda, by = "col_number")

## tidying up some column names
PC1.rec <- PC1.rec %>% 
  select(col_number, PC1.workGrid, PC1.mu, X1, X2, X3, X4, X5, PC1_lambda, PC2_lambda, PC3_lambda, PC4_lambda, PC5_lambda) %>% 
  rename(
    mean = PC1.mu,
    time = PC1.workGrid,
    PC1_eigen = X1,
    PC2_eigen = X2,
    PC3_eigen = X3,
    PC4_eigen = X4,
    PC5_eigen = X5
  )

## plotting the eigenfunctions - this should match with a sub-plot in bottom right created with plot(PC1)
PC1.rec %>% 
  ggplot() +
  # geom_path(aes(x = time, y = mean)) +
  geom_path(aes(x = time, y = PC1_eigen), colour = "black", linewidth = 1.5) +
  geom_path(aes(x = time, y = PC2_eigen), colour = "red", linetype = 2, linewidth = 1.5) +
  geom_path(aes(x = time, y = PC3_eigen), colour = "darkgreen", linetype = 3, linewidth = 1.5) +
  # geom_path(aes(x = time, y = value, colour = pc)) +
  geom_hline(yintercept = 0) +
  labs(x = "time", y = "eigenfunctions", title = "First 3 eigenfunctions")

## check if this matches plot(PC1)
plot(PC1)

# PC scores -> each row is 1 token, each column is one PC
# head(PC1$xiEst)

# PC scores have already been added to the main data set
# head(dat.PC1)

# duplicate each row by 51 times 
dat.PC1.time <- dat.PC1 %>% 
  slice(rep(1:n(), each = 51))

# add col_names to merge with the other data frame
dat.PC1.time <- dat.PC1.time %>% 
  group_by(exclude_key) %>% 
  mutate(
    col_number = row_number()
  ) %>% 
  ungroup()

# merge
dat.PC1.time <- left_join(dat.PC1.time, PC1.rec, by = "col_number")

3.6.2 visualisation: reconstructed PC1 curves based on FPC1

pca.result.350.BAAP <- pca.result.350 %>% 
  filter(segment %in% c("/l/", "/Éą/"))

rec.PC1.fPC1 <- dat.PC1.time %>% 
  mutate(
    PC1_reconstruct = PC1 * PC1_eigen + mean,
    PC2_reconstruct = PC2 * PC2_eigen + mean,
    PC3_reconstruct = PC3 * PC3_eigen + mean,
    PC4_reconstruct = PC4 * PC4_eigen + mean,
    PC5_reconstruct = PC5 * PC5_eigen + mean,
  ) %>% 
  mutate(
    language = case_when(
      L1 == "English" ~ "L1 English",
      L1 == "Japanese" ~ "L1 Japanese"
    )
  ) %>% 
  # group_by(exclude_key) %>% 
  filter(segment %in% c("/l/", "/Éą/")) %>% 
  ggplot() +
  geom_path(aes(x = time, y = PC1_reconstruct, group = exclude_key, colour = vowel), alpha = 0.2, show.legend = TRUE) +
  # geom_smooth(aes(x = time, y = PC1_reconstruct, group = vowel, colour = vowel)) +
  scale_color_manual(values = c("blue3", "brown2", "darkolivegreen")) +
  labs(x = "Proportional Time (%)", y = "Reconstructed PC1 scores from FPC1") +
  labs(title = "FPC1 for PC1") +
  geom_hline(yintercept = 0, linetype = 1, linewidth = 0.1) +
  geom_vline(data = pca.result.350.BAAP, aes(xintercept = mean(vowel_start_prop)), linetype = 2, linewidth = 0.5) +
  # geom_text(data = pca.result.350.BAAP, aes(x = mean(vowel_start_prop)+9, y = 1.5), label = "vowel\nonset", colour = "Black", size = 10) +
  geom_vline(data = pca.result.350.BAAP, aes(xintercept = mean(acoustic_start_prop)), linetype = 2, linewidth = 0.5) +
  # geom_text(data = pca.result.350.BAAP, aes(x = mean(acoustic_start_prop)-9, y = 1.5), label = "liquid\nonset", colour = "Black", size = 10) +
  guides(colour = guide_legend(override.aes = list(alpha = 1))) +
  facet_grid(segment ~ language) +
  theme_classic() +
  theme(legend.text = element_text(size = 30),
        legend.key.size = unit(2, 'cm'),
        legend.position = "bottom",
        legend.title = element_text(size = 30),
        axis.text = element_text(size = 15),
        axis.title = element_text(size = 20),
        # plot.title = element_text(size = 20, hjust = 0, face = "bold"),
        plot.title = element_blank(),
        strip.text.x = element_text(size = 30),
        strip.text.y = element_text(angle = 0, size = 30)
  )

ggsave(rec.PC1.fPC1, filename = "figure/reconstructed_PC1_fPC1.jpg", width = 15, height = 10, dpi = 300)
# raw data and reconstructed trajectories side by side
raw.rec <- ggpubr::ggarrange(PC1.dyn.350, rec.PC1.fPC1, common.legend = TRUE, legend = "bottom")

raw.rec

ggsave(raw.rec, filename = "figure/traj_side.jpg", width = 25, height = 10, dpi = 500)

3.6.3 visualisation: with Japanese tap

# raw
## PC1
PC1.dyn.350.jp <- pca.result.350 %>%
  # filter(segment %in% c("/l/", "/Éą/")) %>% 
  group_by(speaker, prompt, repetition) %>% 
  ggplot() +
  geom_path(aes(x = proportional_time, y = PC1z, colour = vowel, group = rec_date), alpha = 0.03) +
  # geom_smooth(colour = "black", linewidth = 3, se = FALSE, show.legend = TRUE) +
  geom_smooth(aes(x = proportional_time, y = PC1z, colour = vowel, group = vowel), linewidth = 2, se = FALSE, show.legend = TRUE) +
  stat_smooth(aes(x = proportional_time, y = PC1z, colour = vowel, group = vowel), method = "gam", geom = "ribbon", fill = NA, linewidth = 0.5, linetype = 3, show.legend = FALSE) +
  # facet_wrap(segment ~ vowel, ncol = 2) +
  facet_grid(segment ~ L1) +
  # ggtitle("PC1 (Onset: -350 ms)") +
  geom_hline(yintercept = 0, linetype = 1, linewidth = 0.1) +
  geom_vline(aes(xintercept = mean(vowel_start_prop)), linetype = 2, linewidth = 0.5) + 
  # geom_text(aes(x = mean(vowel_start_prop), y = 1.3), label = "vowel\nonset", colour = "Black", size = 5) +  
  geom_vline(aes(xintercept = mean(acoustic_start_prop)), linetype = 2, linewidth = 0.5) + 
  # geom_text(aes(x = mean(acoustic_start_prop), y = 1.3), label = "liquid\nonset", colour = "Black", size = 5) +  
  scale_color_manual(values = c("blue4", "brown4", "darkolivegreen")) +
  theme_classic() +
  theme(legend.text = element_text(size = 30),
        legend.key.size = unit(1, 'cm'),
        legend.position = "bottom",
        legend.title = element_text(size = 30),
        axis.text = element_text(size = 15),
        axis.title = element_text(size = 20),
        plot.title = element_text(size = 30, hjust = 0.5, face = "bold"),
        strip.text.x = element_text(size = 30),
        strip.text.y = element_text(angle = 0, size = 30)
  ) +
  ylim(c(-4, 4)) +
  labs(x = "Proportional time (%)", y = "PC1 (z-score)", colour = "Adjacent vowel") 

# FPC1 reconstruction
rec.PC1.fPC1.jp <- dat.PC1.time %>% 
  mutate(
    PC1_reconstruct = PC1 * PC1_eigen + mean,
    PC2_reconstruct = PC2 * PC2_eigen + mean,
    PC3_reconstruct = PC3 * PC3_eigen + mean,
    PC4_reconstruct = PC4 * PC4_eigen + mean,
    PC5_reconstruct = PC5 * PC5_eigen + mean,
  ) %>% 
  mutate(
    language = case_when(
      L1 == "English" ~ "L1 English",
      L1 == "Japanese" ~ "L1 Japanese"
    )
  ) %>% 
  # group_by(exclude_key) %>% 
  # filter(segment %in% c("/l/", "/Éą/")) %>% 
  ggplot() +
  geom_path(aes(x = time, y = PC1_reconstruct, group = exclude_key, colour = vowel), alpha = 0.2, show.legend = TRUE) +
  # geom_smooth(aes(x = time, y = PC1_reconstruct, group = vowel, colour = vowel)) +
  scale_color_manual(values = c("blue3", "brown2", "darkolivegreen")) +
  labs(x = "Proportional Time (%)", y = "Reconstructed PC1 scores from FPC1") +
  labs(title = "FPC1 for PC1") +
  geom_hline(yintercept = 0, linetype = 1, linewidth = 0.1) +
  geom_vline(data = pca.result.350, aes(xintercept = mean(vowel_start_prop)), linetype = 2, linewidth = 0.5) +
  # geom_text(data = pca.result.350.BAAP, aes(x = mean(vowel_start_prop)+9, y = 1.5), label = "vowel\nonset", colour = "Black", size = 10) +
  geom_vline(data = pca.result.350, aes(xintercept = mean(acoustic_start_prop)), linetype = 2, linewidth = 0.5) +
  # geom_text(data = pca.result.350.BAAP, aes(x = mean(acoustic_start_prop)-9, y = 1.5), label = "liquid\nonset", colour = "Black", size = 10) +
  guides(colour = guide_legend(override.aes = list(alpha = 1))) +
  facet_grid(segment ~ language) +
  theme_classic() +
  theme(legend.text = element_text(size = 30),
        legend.key.size = unit(2, 'cm'),
        legend.position = "bottom",
        legend.title = element_text(size = 30),
        axis.text = element_text(size = 15),
        axis.title = element_text(size = 20),
        # plot.title = element_text(size = 20, hjust = 0, face = "bold"),
        plot.title = element_blank(),
        strip.text.x = element_text(size = 30),
        strip.text.y = element_text(angle = 0, size = 30)
  )

# raw data and reconstructed trajectories side by side
raw.rec.jp <- ggpubr::ggarrange(PC1.dyn.350.jp, rec.PC1.fPC1.jp, common.legend = TRUE, legend = "bottom")

raw.rec.jp

ggsave(raw.rec.jp, filename = "figure/traj_side_jp.jpg", width = 25, height = 10, dpi = 500)

4 Other visualisation

4.1 PCA tongue plot

# PC1
PC1.350.plot.BAAP <- ggplot() +
  geom_path(data = estimate.350, aes(x = mean.x, y = mean.y), linewidth = 1.5) +
  geom_path(data = estimate.350, aes(x = PC1.max.x, y = PC1.max.y), linewidth = 1, alpha = 0.5, linetype = "dashed") +
  geom_path(data = estimate.350, aes(x = PC1.min.x, y = PC1.min.y), linewidth = 1, alpha = 0.5, linetype = "dotted") +
  geom_point(data = estimate.350, aes(x = PC1.max.x, y = PC1.max.y), shape = 3, size = 3, stroke = 2) +
  geom_point(data = estimate.350, aes(x = PC1.min.x, y = PC1.min.y), shape = "\u2212", size = 5, stroke = 8) +
  xlab("X") + ylab("Y") +
  ggtitle("PC1") +
  theme_classic() +
  # ylim(-35, 25) +
  theme(plot.title = element_text(size = 30, face = "bold"),
        legend.position = "top",
        legend.title = element_blank(),
        axis.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        strip.text.x = element_text(size = 15)
  )


# PC2
PC2.350.plot.BAAP <- ggplot() +
  geom_path(data = estimate.350, aes(x = mean.x, y = mean.y), linewidth = 1.5) +
  geom_path(data = estimate.350, aes(x = PC2.max.x, y = PC2.max.y), linewidth = 1, alpha = 0.5, linetype = "dashed") +
  geom_path(data = estimate.350, aes(x = PC2.min.x, y = PC2.min.y), linewidth = 1, alpha = 0.5, linetype = "dotted") +
  geom_point(data = estimate.350, aes(x = PC2.max.x, y = PC2.max.y), shape = 3, size = 3, stroke = 2) +
  geom_point(data = estimate.350, aes(x = PC2.min.x, y = PC2.min.y), shape = "\u2212", size = 5, stroke = 8) +
  xlab("X") + ylab("Y") +
  ggtitle("PC2") +
  theme_classic() +
  # ylim(-35, 25) +
  theme(plot.title = element_text(size = 30, face = "bold"),
        legend.position = "top",
        legend.title = element_blank(),
        axis.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        strip.text.x = element_text(size = 15),
  )

pc.plot.2 <- ggpubr::ggarrange(PC1.350.plot.BAAP, PC2.350.plot.BAAP, ncol = 1)

pc.plot.2

ggsave(pc.plot.2, filename = "figure/PC1_PC2.jpg", width = 8, height = 12, dpi = 1000, bg = "transparent")

4.2 dynamic PC

## PC1
PC1.dyn.350 <- pca.result.350 %>%
  filter(segment %in% c("/l/", "/Éą/")) %>% 
  group_by(speaker, prompt, repetition) %>% 
  ggplot() +
  geom_path(aes(x = proportional_time, y = PC1z, colour = vowel, group = rec_date), alpha = 0.03) +
  # geom_smooth(colour = "black", linewidth = 3, se = FALSE, show.legend = TRUE) +
  geom_smooth(aes(x = proportional_time, y = PC1z, colour = vowel, group = vowel), linewidth = 2, se = FALSE, show.legend = TRUE) +
  stat_smooth(aes(x = proportional_time, y = PC1z, colour = vowel, group = vowel), method = "gam", geom = "ribbon", fill = NA, linewidth = 0.5, linetype = 3, show.legend = FALSE) +
  # facet_wrap(segment ~ vowel, ncol = 2) +
  facet_grid(segment ~ L1) +
  # ggtitle("PC1 (Onset: -350 ms)") +
  geom_hline(yintercept = 0, linetype = 1, linewidth = 0.1) +
  geom_vline(aes(xintercept = mean(vowel_start_prop)), linetype = 2, linewidth = 0.5) + 
  # geom_text(aes(x = mean(vowel_start_prop), y = 1.3), label = "vowel\nonset", colour = "Black", size = 5) +  
  geom_vline(aes(xintercept = mean(acoustic_start_prop)), linetype = 2, linewidth = 0.5) + 
  # geom_text(aes(x = mean(acoustic_start_prop), y = 1.3), label = "liquid\nonset", colour = "Black", size = 5) +  
  scale_color_manual(values = c("blue4", "brown4", "darkolivegreen")) +
  theme_classic() +
  theme(legend.text = element_text(size = 30),
        legend.key.size = unit(1, 'cm'),
        legend.position = "bottom",
        legend.title = element_text(size = 30),
        axis.text = element_text(size = 15),
        axis.title = element_text(size = 20),
        plot.title = element_text(size = 30, hjust = 0.5, face = "bold"),
        strip.text.x = element_text(size = 30),
        strip.text.y = element_text(angle = 0, size = 30)
  ) +
  ylim(c(-4, 4)) +
  labs(x = "Proportional time (%)", y = "PC1 (z-score)", colour = "Adjacent vowel") 

PC1.dyn.350

ggsave(PC1.dyn.350, filename = "figure/dynamic_PC1.jpg", width = 20, height = 10, dpi = 500)

4.3 violin plot for abstract

# e.g. plot fPC1
PC1.fPC1.violinplot.BAAP <- dat.PC1 %>% 
  # filter(segment %in% c("/Éą/", "/l/")) %>% 
  ggplot2::ggplot()+
  # aes(x = reorder(speaker, -PC1), y = PC1, fill = L1) +
  aes(x = vowel, y = PC1, fill = vowel) +
  geom_violin(show.legend = FALSE, alpha = 0.5) +
  geom_boxplot(show.legend = FALSE, alpha = 0.7, width = 0.3, fill = "white") +
  geom_hline(yintercept = 0, linetype = 2) +
  scale_fill_manual(values = c("blue4", "brown4", "darkolivegreen")) +
  facet_grid(segment ~ L1) +
  theme_classic() +
  theme(legend.text = element_text(size = 30),
        legend.key.size = unit(1, 'cm'),
        legend.position = "bottom",
        legend.title = element_text(size = 30),
        axis.text = element_text(size = 15),
        axis.title = element_text(size = 20),
        plot.title = element_text(size = 20, hjust = 0.5, face = "bold"),
        strip.text.x = element_text(size = 20),
        strip.text.y = element_text(angle = 0, size = 20)
  ) +
  # ylim(c(-4, 4)) +
  labs(x = "Adjacent vowels", y = "fPC1")

PC1.fPC1.violinplot.BAAP

4.4 illustrating FPCA: slide 8

## raw data
dyn.all <- pca.result.350 %>%
  filter(segment %in% c("/l/", "/Éą/")) %>% 
  group_by(speaker, prompt, repetition) %>% 
  ggplot() +
  geom_path(aes(x = proportional_time, y = PC1z, colour = vowel, group = rec_date), alpha = 0.03, show.legend = FALSE) +
  # geom_smooth(colour = "black", linewidth = 3, se = FALSE, show.legend = TRUE) +
  geom_smooth(aes(x = proportional_time, y = PC1z, colour = vowel, group = vowel), colour = "white", alpha = 0.5, linewidth = 3, se = FALSE, show.legend = FALSE) +
    geom_smooth(aes(x = proportional_time, y = PC1z, colour = vowel, group = vowel), alpha = 0.5, linewidth = 2, se = FALSE, show.legend = FALSE) +
  stat_smooth(aes(x = proportional_time, y = PC1z, colour = vowel, group = vowel), method = "gam", geom = "ribbon", fill = NA, linewidth = 0.5, linetype = 3, show.legend = FALSE) +
  # facet_wrap(segment ~ vowel, ncol = 2) +
  # facet_grid(segment ~ L1) +
  # ggtitle("PC1 (Onset: -350 ms)") +
  geom_hline(yintercept = 0, linetype = 1, linewidth = 0.1) +
  geom_vline(aes(xintercept = mean(vowel_start_prop)), linetype = 2, linewidth = 0.5) + 
  # geom_text(aes(x = mean(vowel_start_prop), y = 1.3), label = "vowel\nonset", colour = "Black", size = 5) +  
  geom_vline(aes(xintercept = mean(acoustic_start_prop)), linetype = 2, linewidth = 0.5) + 
  # geom_text(aes(x = mean(acoustic_start_prop), y = 1.3), label = "liquid\nonset", colour = "Black", size = 5) +  
  scale_color_manual(values = c("blue4", "brown4", "darkolivegreen")) +
  theme_classic() +
  theme(legend.text = element_text(size = 30),
        legend.key.size = unit(1, 'cm'),
        legend.position = "bottom",
        legend.title = element_text(size = 30),
        axis.text = element_text(size = 15),
        # axis.title = element_text(size = 20),
        axis.title = element_blank(),
        plot.title = element_text(size = 30, hjust = 0.5, face = "bold"),
        strip.text.x = element_text(size = 30),
        strip.text.y = element_text(angle = 0, size = 30)
  ) +
  ylim(c(-4, 4)) +
  labs(x = "Proportional time (%)", y = "PC1", colour = "Adjacent vowel") 

## FPC1 perturbation
pc1_perturbation_BAAP <- p_PC1 %>% 
  filter(PC == "PC1") %>% 
  mutate(
    fPC = case_when(
      PC == "PC1" ~ "fPC1"
    )
  ) %>% 
  ggplot2::ggplot() +
  aes(x = time, y = mean) +
  geom_path() +
  geom_point(aes(y = Qplus), shape = 3, size = 3, colour = "red") +
  geom_point(aes(y = Qminus), shape = 95, size = 5, colour = "blue") +
  xlab("Proportional Time (%)") + 
  ylab("PC") +
  geom_vline(data = pca.result.350, aes(xintercept = mean(acoustic_start_prop)), linetype = 2) +
  geom_vline(data = pca.result.350, aes(xintercept = mean(vowel_start_prop)), linetype = 2) +
  # facet_wrap(~ fPC, ncol = 2) +
  scale_color_manual(values = c("blue4", "brown4", "darkolivegreen")) +
  theme_classic() +
  theme(legend.text = element_text(size = 30),
        legend.key.size = unit(1, 'cm'),
        legend.position = "bottom",
        legend.title = element_text(size = 30),
        axis.text = element_text(size = 15),
        axis.title = element_text(size = 20),
        axis.title.y = element_blank(),
        plot.title = element_text(size = 20, hjust = 0.5, face = "bold"),
        strip.text.x = element_text(size = 20),
        strip.text.y = element_text(angle = 0, size = 20)
  ) +
  labs(x = "Proportional Time(%)", y = "PC1 captured by FPC1")

illustration <- ggpubr::ggarrange(dyn.all, pc1_perturbation_BAAP, ncol = 1)

illustration

ggsave(illustration, filename = "figure/FPCA_illustration.jpg", width = 5, height = 5, dpi = 1000)

5 analysis 3: Baysian hierarchical regression modelling

5.1 English L

5.1.1 data preparation

# subset English /l/ data
dat.PC1.EN.L <- dat.PC1 %>% 
  filter(segment == "/l/") %>% 
  rename(
    fPC1 = PC1,
    fPC2 = PC2,
    fPC3 = PC3,
    fPC4 = PC4,
    fPC5 = PC5
  )

# define the baseline level explicitly
dat.PC1.EN.L <- dat.PC1.EN.L %>%
   mutate(
     vowel = case_when(
       vowel == "/a/" ~ "A",
       vowel == "/i/" ~ "I",
       vowel == "/u/" ~ "U"
     )
   ) 

dat.PC1.EN.L$vowel <- factor(dat.PC1.EN.L$vowel, levels = c("I", "A", "U"))
dat.PC1.EN.L$L1 <- factor(dat.PC1.EN.L$L1, levels = c("English", "Japanese"))

# convert other variables into factor
dat.PC1.EN.L$speaker <- as.factor(dat.PC1.EN.L$speaker)
dat.PC1.EN.L$speaker <- droplevels(dat.PC1.EN.L$speaker)

dat.PC1.EN.L$prompt <- as.factor(dat.PC1.EN.L$prompt)
dat.PC1.EN.L$prompt <- droplevels(dat.PC1.EN.L$prompt)
levels(dat.PC1.EN.L$prompt)
## [1] "lamb"  "lamp"  "lap"   "leaf"  "leap"  "leave" "loom"  "lube"
# specify prior
b1_prior <- c(
  brms::set_prior("normal(0, 20)", class = "Intercept"),
  brms::set_prior("normal(0, 100)", class = "b"),
  brms::set_prior("normal(0, 10)", class = "sd"),
  brms::set_prior("normal(0, 10)", class = "sigma"),
  brms::set_prior("lkj(2)", class = "cor"))

5.1.2 full model

# summary
summary(PC1.FPC1.L.m1.BAAP)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: fPC1 ~ vowel + L1 + L1:vowel + (1 + vowel | speaker) + (1 + L1 | prompt) 
##    Data: dat.PC1.EN.L (Number of observations: 1309) 
##   Draws: 4 chains, each with iter = 12000; warmup = 2000; thin = 1;
##          total post-warmup draws = 40000
## 
## Group-Level Effects: 
## ~prompt (Number of levels: 8) 
##                           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept)                 2.34      0.92     1.26     4.69 1.00    11954
## sd(L1Japanese)                2.18      0.92     1.15     4.50 1.00    12011
## cor(Intercept,L1Japanese)    -0.72      0.27    -0.98     0.01 1.00    14510
##                           Tail_ESS
## sd(Intercept)                14206
## sd(L1Japanese)               13232
## cor(Intercept,L1Japanese)    20002
## 
## ~speaker (Number of levels: 43) 
##                       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept)             2.81      0.34     2.23     3.57 1.00    11447
## sd(vowelA)                3.96      0.48     3.14     5.03 1.00     9436
## sd(vowelU)                2.74      0.38     2.09     3.56 1.00    12869
## cor(Intercept,vowelA)    -0.38      0.14    -0.62    -0.09 1.00     8383
## cor(Intercept,vowelU)    -0.21      0.16    -0.50     0.11 1.00    12803
## cor(vowelA,vowelU)        0.37      0.14     0.06     0.63 1.00    13500
##                       Tail_ESS
## sd(Intercept)            19876
## sd(vowelA)               17289
## sd(vowelU)               22618
## cor(Intercept,vowelA)    15819
## cor(Intercept,vowelU)    19553
## cor(vowelA,vowelU)       20638
## 
## Population-Level Effects: 
##                   Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept             3.59      1.64     0.35     6.80 1.00    11404    16164
## vowelA               -6.45      2.28   -11.01    -1.91 1.00    11409    16903
## vowelU               -3.01      2.44    -7.85     1.83 1.00    13893    16851
## L1Japanese            3.68      1.65     0.48     6.94 1.00    10210    14967
## vowelA:L1Japanese    -4.61      2.34    -9.19     0.03 1.00    10300    15646
## vowelU:L1Japanese    -2.59      2.37    -7.27     2.05 1.00    12543    16216
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     2.49      0.05     2.39     2.59 1.00    45073    30272
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

5.1.3 model comparison

# summary
PC1.FPC1.L.m2.BAAP
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: fPC1 ~ vowel + L1 + (1 + vowel | speaker) + (1 + L1 | prompt) 
##    Data: dat.PC1.EN.L (Number of observations: 1309) 
##   Draws: 4 chains, each with iter = 12000; warmup = 2000; thin = 1;
##          total post-warmup draws = 40000
## 
## Group-Level Effects: 
## ~prompt (Number of levels: 8) 
##                           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept)                 2.54      0.98     1.34     4.99 1.00     9955
## sd(L1Japanese)                2.45      0.88     1.29     4.66 1.00     8655
## cor(Intercept,L1Japanese)    -0.72      0.28    -0.98     0.06 1.00     7932
##                           Tail_ESS
## sd(Intercept)                13776
## sd(L1Japanese)               17405
## cor(Intercept,L1Japanese)    12010
## 
## ~speaker (Number of levels: 43) 
##                       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept)             2.82      0.35     2.23     3.59 1.00     9570
## sd(vowelA)                4.01      0.50     3.16     5.11 1.00     6847
## sd(vowelU)                2.75      0.38     2.10     3.57 1.00    11585
## cor(Intercept,vowelA)    -0.38      0.14    -0.63    -0.09 1.00     7201
## cor(Intercept,vowelU)    -0.21      0.16    -0.50     0.11 1.00    11265
## cor(vowelA,vowelU)        0.38      0.15     0.07     0.64 1.00    11803
##                       Tail_ESS
## sd(Intercept)            16942
## sd(vowelA)               14884
## sd(vowelU)               20773
## cor(Intercept,vowelA)    13754
## cor(Intercept,vowelU)    18475
## cor(vowelA,vowelU)       18025
## 
## Population-Level Effects: 
##            Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept      5.09      1.58     2.00     8.23 1.00     8379    11767
## vowelA        -9.76      2.00   -13.30    -5.32 1.00     8109     8293
## vowelU        -4.91      1.85    -8.38    -0.88 1.00    10129     9872
## L1Japanese     1.59      1.29    -0.97     4.11 1.00     9311    15721
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     2.49      0.05     2.39     2.59 1.00    45147    31610
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
PC1.FPC1.L.m3.BAAP
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: fPC1 ~ L1 + (1 | speaker) + (1 + L1 | prompt) 
##    Data: dat.PC1.EN.L (Number of observations: 1309) 
##   Draws: 4 chains, each with iter = 12000; warmup = 2000; thin = 1;
##          total post-warmup draws = 40000
## 
## Group-Level Effects: 
## ~prompt (Number of levels: 8) 
##                           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept)                 4.43      1.51     2.47     8.24 1.00    10295
## sd(L1Japanese)                3.28      1.18     1.79     6.24 1.00    10834
## cor(Intercept,L1Japanese)     0.21      0.31    -0.43     0.73 1.00    14075
##                           Tail_ESS
## sd(Intercept)                18479
## sd(L1Japanese)               18079
## cor(Intercept,L1Japanese)    20740
## 
## ~speaker (Number of levels: 43) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     2.56      0.31     2.03     3.24 1.00     5916    12508
## 
## Population-Level Effects: 
##            Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept      0.41      1.78    -3.09     4.00 1.00     6039    11118
## L1Japanese     1.33      1.51    -1.67     4.30 1.00     6844    12942
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     3.19      0.06     3.07     3.32 1.00    42152    28696
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
PC1.FPC1.L.m4.BAAP
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: fPC1 ~ vowel + (1 + vowel | speaker) + (1 | prompt) 
##    Data: dat.PC1.EN.L (Number of observations: 1309) 
##   Draws: 4 chains, each with iter = 12000; warmup = 2000; thin = 1;
##          total post-warmup draws = 40000
## 
## Group-Level Effects: 
## ~prompt (Number of levels: 8) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.36      0.68     0.63     3.11 1.00     9737    12529
## 
## ~speaker (Number of levels: 43) 
##                       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept)             3.26      0.38     2.61     4.10 1.00     6603
## sd(vowelA)                4.41      0.51     3.54     5.52 1.00     5127
## sd(vowelU)                2.90      0.38     2.23     3.74 1.00     9607
## cor(Intercept,vowelA)    -0.52      0.12    -0.72    -0.27 1.00     5416
## cor(Intercept,vowelU)    -0.36      0.14    -0.61    -0.06 1.00    10120
## cor(vowelA,vowelU)        0.49      0.13     0.21     0.70 1.00     9238
##                       Tail_ESS
## sd(Intercept)            12977
## sd(vowelA)                9231
## sd(vowelU)               16841
## cor(Intercept,vowelA)    10629
## cor(Intercept,vowelU)    17863
## cor(vowelA,vowelU)       17178
## 
## Population-Level Effects: 
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     6.04      1.03     4.06     8.07 1.00     6860    11258
## vowelA       -9.51      1.44   -12.33    -6.73 1.00     6959    11431
## vowelU       -4.69      1.47    -7.60    -1.77 1.00    11067    13625
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     2.62      0.05     2.51     2.72 1.00    40182    30718
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
# Model comparison using Bayes Factor: Interaction
comparison.PC1.FPC1.L.BAAP.m1.m2 <- bayestestR::bayesfactor_models(PC1.FPC1.L.m1.BAAP, PC1.FPC1.L.m2.BAAP, denominator = PC1.FPC1.L.m1.BAAP) # denominator: the model against which comparison is performed

comparison.PC1.FPC1.L.BAAP.m1.m2
## Bayes Factors for Model Comparison
## 
##     Model                                                      BF
## [2] vowel + L1 + (1 + vowel | speaker) + (1 + L1 | prompt) 238.58
## 
## * Against Denominator: [1] vowel + L1 + L1:vowel + (1 + vowel | speaker) + (1 + L1 | prompt)
## *   Bayes Factor Type: marginal likelihoods (bridgesampling)
# plot
plot(comparison.PC1.FPC1.L.BAAP.m1.m2, n_pies = "one", value = "BF")

# Model comparison using Bayes Factor: Fixed effects
comparison.fixed.effect.PC1.FPC1.L.BAAP <- bayestestR::bayesfactor_models(PC1.FPC1.L.m2.BAAP, PC1.FPC1.L.m3.BAAP, PC1.FPC1.L.m4.BAAP, denominator = PC1.FPC1.L.m2.BAAP)

comparison.fixed.effect.PC1.FPC1.L.BAAP
## Bayes Factors for Model Comparison
## 
##     Model                                              BF
## [2] L1 + (1 | speaker) + (1 + L1 | prompt)       9.02e-92
## [3] vowel + (1 + vowel | speaker) + (1 | prompt) 2.57e-21
## 
## * Against Denominator: [1] vowel + L1 + (1 + vowel | speaker) + (1 + L1 | prompt)
## *   Bayes Factor Type: marginal likelihoods (bridgesampling)
# plot
plot(comparison.fixed.effect.PC1.FPC1.L.BAAP, n_pies = "one", value = "BF")

5.1.4 saving model for future analysis

# save models for future analysis
save(PC1.FPC1.L.m1.BAAP, file = "model/PC1.FPC1.L.m1.BAAP.rda")
save(PC1.FPC1.L.m2.BAAP, file = "model/PC1.FPC1.L.m2.BAAP.rda")
save(PC1.FPC1.L.m3.BAAP, file = "model/PC1.FPC1.L.m3.BAAP.rda")
save(PC1.FPC1.L.m4.BAAP, file = "model/PC1.FPC1.L.m4.BAAP.rda")

5.1.5 visualising model

load(file = "model/PC1.FPC1.L.m1.BAAP.rda")

# obtain posterior draws (sampled values)
post_data_L_PC1_FPC1_BAAP <- PC1.FPC1.L.m1.BAAP %>% 
  emmeans::emmeans(~ L1*vowel, epred = TRUE) %>% 
  tidybayes::gather_emmeans_draws()

# obtain highest density interval
post_data_L_PC1_FPC1_BAAP_hdi <- post_data_L_PC1_FPC1_BAAP %>% 
  tidybayes::median_hdi() %>% 
  mutate(
    vowel = case_when(
      vowel == "A" ~ "/a/",
      vowel == "I" ~ "/i/",
      vowel == "U" ~ "/u/"
    ),
    L1 = case_when(
      L1 == "English" ~ "L1 English",
      L1 == "Japanese" ~ "L1 Japanese"
    ))

# renaming vowel labels
post_data_L_PC1_FPC1_BAAP <- post_data_L_PC1_FPC1_BAAP %>% 
  mutate(
    vowel = case_when(
      vowel == "A" ~ "/a/",
      vowel == "I" ~ "/i/",
      vowel == "U" ~ "/u/"
    ),
    L1 = case_when(
      L1 == "English" ~ "L1 English",
      L1 == "Japanese" ~ "L1 Japanese"
    ))

# plotting posterior distribution
post_data_L_PC1_FPC1_BAAP_plot <- ggplot() +
  geom_point(data = post_data_L_PC1_FPC1_BAAP_hdi, aes(x = vowel, y = .value, colour = vowel, shape = vowel), size = 6, stroke = 2, show.legend = FALSE) +
  geom_errorbar(data = post_data_L_PC1_FPC1_BAAP_hdi, aes(x = vowel, y = .value, ymin = .lower, ymax = .upper), size = 2, width = 0.8, show.legend = FALSE) +
  ggbeeswarm::geom_quasirandom(data = post_data_L_PC1_FPC1_BAAP, aes(x = vowel, y = .value, colour = vowel), alpha = 0.008, size = 0.01, width = 0.3, dodge.width = 0.3, show.legend = FALSE) +
  geom_hline(yintercept = 0, size = 0.5, linetype = 2) +
  facet_wrap(~ factor(L1, levels = c("L1 English", "L1 Japanese"))) +
  scale_colour_manual(values = c("blue4", "brown4", "darkolivegreen")) +
  scale_y_continuous(limits = c(-20, 20)) +
  theme_classic() +
   theme(legend.text = element_text(size = 30),
        legend.key.size = unit(2, 'cm'),
        legend.position = "bottom",
        legend.title = element_text(size = 30),
        axis.text.x = element_text(size = 30),
        # axis.title.x = element_text(size = 20),
        axis.title.x = element_blank(),
        axis.title.y = element_text(size = 20),
        plot.title = element_text(size = 30, hjust = 0, face = "bold"),
        strip.text.x = element_text(size = 30),
        strip.text.y = element_text(angle = 0, size = 30)
  ) +
  labs(x = "Vowel", y = "FPC1") +
  labs(title = "/l/: FPC1 for PC1")

post_data_L_PC1_FPC1_BAAP_plot

5.1.6 model details

5.1.7 quantifying vowel context contrast using emmeans

# quantifying contrasts
## get the adjusted means
PC1.FPC1.L.m1.BAAP.em <- emmeans::emmeans(PC1.FPC1.L.m1.BAAP,  ~ vowel|L1)
PC1.FPC1.L.m1.BAAP.em
## L1 = English:
##  vowel emmean lower.HPD upper.HPD
##  I      3.592     0.279     6.727
##  A     -2.856    -6.347     0.667
##  U      0.581    -3.454     4.532
## 
## L1 = Japanese:
##  vowel emmean lower.HPD upper.HPD
##  I      7.260     4.705     9.799
##  A     -3.795    -6.499    -1.119
##  U      1.674    -1.428     4.969
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95
## get all possible contrasts
PC1.FPC1.L.m1.BAAP.cont <- emmeans::contrast(PC1.FPC1.L.m1.BAAP.em, "tukey")
PC1.FPC1.L.m1.BAAP.cont
## L1 = English:
##  contrast estimate lower.HPD upper.HPD
##  I - A        6.44      1.96     11.05
##  I - U        3.00     -1.86      7.80
##  A - U       -3.44     -8.49      1.48
## 
## L1 = Japanese:
##  contrast estimate lower.HPD upper.HPD
##  I - A       11.04      7.51     14.74
##  I - U        5.59      1.58      9.45
##  A - U       -5.47     -9.65     -1.57
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95
## get the posterior draws from the contrasts
PC1_FPC1_L_cont_BAAP_posterior <- tidybayes::gather_emmeans_draws(PC1.FPC1.L.m1.BAAP.cont) %>% 
  mutate(
    L1 = case_when(
      L1 == "English" ~ "L1 English",
      L1 == "Japanese" ~ "L1 Japanese"
    )
  )

# calculating probability of direction 
## L1 English
EngIA_L_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "I - A"] > 0)) / length(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "I - A"]) # EngIA_R: positive

EngIU_L_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "I - U"] > 0)) / length(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "I - U"]) # EngIU_R: positive

EngAU_L_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "A - U"] < 0)) / length(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "A - U"]) # EngAU_R: negative

## L1 Japanese
JapIA_L_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "I - A"] > 0)) / length(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "I - A"]) # JapIA_R: positive

JapIU_L_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "I - U"] > 0)) / length(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "I - U"]) # JapIU_R: positive

JapAU_L_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "A - U"] < 0)) / length(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "A - U"]) # JapAU_R: negative

### contrast probability of direction merged together
L_PC1_FPC1_contrast_BAAP <- c(EngIA_L_PC1_FPC1_BAAP_pd, EngIU_L_PC1_FPC1_BAAP_pd, EngAU_L_PC1_FPC1_BAAP_pd, JapIA_L_PC1_FPC1_BAAP_pd, JapIU_L_PC1_FPC1_BAAP_pd, JapAU_L_PC1_FPC1_BAAP_pd)

### merge contrast pd with hdi/median
L_PC1_FPC1_contrast_BAAP <- data.frame(PC1.FPC1.L.m1.BAAP.cont, L_PC1_FPC1_contrast_BAAP) %>% rename(
  PD = L_PC1_FPC1_contrast_BAAP
)

### results
L_PC1_FPC1_contrast_BAAP %>% 
  mutate(across(where(is.numeric), ~ round(., digits = 2))) %>% 
  mutate(
    contrast = factor(contrast, levels = c("A - U", "I - U", "I - A"))
  ) %>% 
  arrange(L1, contrast)
##   contrast       L1 estimate lower.HPD upper.HPD   PD
## 1    A - U  English    -3.44     -8.49      1.48 0.92
## 2    I - U  English     3.00     -1.86      7.80 0.91
## 3    I - A  English     6.44      1.96     11.05 0.99
## 4    A - U Japanese    -5.47     -9.65     -1.57 0.99
## 5    I - U Japanese     5.59      1.58      9.45 0.99
## 6    I - A Japanese    11.04      7.51     14.74 1.00

5.1.8 plotting vowel contrast

## plot
PC1_FPC1_L_cont_posterior_BAAP_plot <- PC1_FPC1_L_cont_BAAP_posterior %>% 
  ggplot(aes(y = contrast, x = .value)) +
  # tidybayes::stat_halfeye(point_interval = "median_hdi", aes(fill = after_stat(level)), .width = c(.66, .95, .99)) +
  tidybayes::stat_slab(aes(fill = after_stat(level), slab_alpha = 0.4), point_interval = "median_hdi", .width = c(.89, .95, .99)) +
  tidybayes::stat_pointinterval(point_interval = "median_hdi", .width = c(.89, .95, .99)) +
  # scale_fill_brewer(na.translate = FALSE) +
  scale_fill_manual(values = c("darkolivegreen", "blue4",  "brown4"), na.translate = FALSE) +
  scale_x_continuous(limits = c(-20, 20)) +
  facet_wrap(~ L1) +
  geom_vline(xintercept = 0, lty = 2) +
  theme_classic() +
  guides(fill = guide_legend(override.aes = list(alpha = 0.4), title = "HDI")) +
  labs(x = "difference", title = "/l/: PC1/FPC1 contrast") +
  theme(legend.text = element_text(size = 30),
        # legend.key.size = unit(2, 'cm'),
        legend.position = "bottom",
        legend.title = element_text(size = 30),
        axis.text = element_text(size = 15),
        axis.text.y = element_text(size = 25),
        # axis.title = element_text(size = 20),
        axis.title = element_blank(),
        plot.title = element_text(size = 30, hjust = 0, face = "bold"),
        strip.text.x = element_text(size = 30),
        strip.text.y = element_text(angle = 0, size = 30))

PC1_FPC1_L_cont_posterior_BAAP_plot

5.1.9 slide plot

L.BAAP <- ggarrange(post_data_L_PC1_FPC1_BAAP_plot, PC1_FPC1_L_cont_posterior_BAAP_plot, common.legend = FALSE, legend = "bottom", ncol = 1) 

L.BAAP

## saving plot
ggsave(L.BAAP, filename = "figure/PC1_FPC1_L.png", width = 8, height = 10, dpi = 1000)

5.2 English R

5.2.1 data preparation

# subset English /Éą/ data
dat.PC1.EN.R <- dat.PC1 %>% 
  filter(segment == "/Éą/") %>% 
  rename(
    fPC1 = PC1,
    fPC2 = PC2,
    fPC3 = PC3,
    fPC4 = PC4,
    fPC5 = PC5
  )

# define the baseline level explicitly
dat.PC1.EN.R <- dat.PC1.EN.R %>%
   mutate(
     vowel = case_when(
       vowel == "/a/" ~ "A",
       vowel == "/i/" ~ "I",
       vowel == "/u/" ~ "U"
     )
   ) 

dat.PC1.EN.R$vowel <- factor(dat.PC1.EN.R$vowel, levels = c("I", "A", "U"))
dat.PC1.EN.R$L1 <- factor(dat.PC1.EN.R$L1, levels = c("English", "Japanese"))

# convert other variables into factor
dat.PC1.EN.R$speaker <- as.factor(dat.PC1.EN.R$speaker)
dat.PC1.EN.R$speaker <- droplevels(dat.PC1.EN.R$speaker)
levels(dat.PC1.EN.R$speaker)
##  [1] "2d57ke" "2drb3c" "2zy9tf" "3bcpyh" "3hsubn" "3pzrts" "3wy8us" "4ps8zx"
##  [9] "54i2ks" "5jzj2h" "5upwe3" "6p63jy" "7cd4t4" "bfwizh" "birw55" "bj8mjm"
## [17] "byxcff" "c5y8z6" "cdsju7" "cu2jce" "dbtzn2" "ds6umh" "fgd95u" "fkcwjr"
## [25] "h5s4x3" "heat7g" "hgrist" "i3wa7f" "j586ts" "jcy8xi" "kjn9n4" "m46dhf"
## [33] "m5r28t" "s6a8gh" "srky8g" "tay55n" "uig6n9" "ut4e5m" "we8z58" "xub9bc"
## [41] "z3n578" "zajk25" "zz3r2g"
dat.PC1.EN.R$prompt <- as.factor(dat.PC1.EN.R$prompt)
dat.PC1.EN.R$prompt <- droplevels(dat.PC1.EN.R$prompt)
levels(dat.PC1.EN.R$prompt)
## [1] "ram"   "ramp"  "rap"   "reap"  "reef"  "reeve" "room"  "rube"

5.2.2 full model

# summary
summary(PC1.FPC1.R.m1.BAAP)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: fPC1 ~ vowel + L1 + L1:vowel + (1 + vowel | speaker) + (1 + L1 | prompt) 
##    Data: dat.PC1.EN.R (Number of observations: 1321) 
##   Draws: 4 chains, each with iter = 12000; warmup = 2000; thin = 1;
##          total post-warmup draws = 40000
## 
## Group-Level Effects: 
## ~prompt (Number of levels: 8) 
##                           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept)                 2.06      0.92     1.01     4.41 1.00    12759
## sd(L1Japanese)                1.43      0.71     0.58     3.21 1.00    14188
## cor(Intercept,L1Japanese)    -0.51      0.32    -0.94     0.27 1.00    24991
##                           Tail_ESS
## sd(Intercept)                15486
## sd(L1Japanese)               18151
## cor(Intercept,L1Japanese)    25564
## 
## ~speaker (Number of levels: 43) 
##                       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept)             3.36      0.40     2.67     4.25 1.00    11666
## sd(vowelA)                2.31      0.35     1.69     3.08 1.00    15536
## sd(vowelU)                2.98      0.44     2.22     3.93 1.00    13599
## cor(Intercept,vowelA)    -0.41      0.15    -0.66    -0.10 1.00    18748
## cor(Intercept,vowelU)    -0.34      0.15    -0.61    -0.03 1.00    16962
## cor(vowelA,vowelU)        0.28      0.17    -0.09     0.59 1.00    10883
##                       Tail_ESS
## sd(Intercept)            19801
## sd(vowelA)               24906
## sd(vowelU)               23525
## cor(Intercept,vowelA)    26098
## cor(Intercept,vowelU)    23882
## cor(vowelA,vowelU)       21106
## 
## Population-Level Effects: 
##                   Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept             1.23      1.59    -1.94     4.36 1.00    11058    17109
## vowelA               -3.23      1.99    -7.13     0.69 1.00    13949    17304
## vowelU               -3.64      2.20    -8.00     0.78 1.00    14560    18292
## L1Japanese            2.07      1.46    -0.84     4.95 1.00     9206    16575
## vowelA:L1Japanese    -8.86      1.55   -11.92    -5.78 1.00    14479    19235
## vowelU:L1Japanese    -3.54      1.79    -7.09     0.04 1.00    13903    19244
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     3.23      0.07     3.10     3.36 1.00    54803    29616
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

5.2.3 model comparison

# summary
PC1.FPC1.R.m2.BAAP
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: fPC1 ~ vowel + L1 + (1 + vowel | speaker) + (1 + L1 | prompt) 
##    Data: dat.PC1.EN.R (Number of observations: 1321) 
##   Draws: 4 chains, each with iter = 12000; warmup = 2000; thin = 1;
##          total post-warmup draws = 40000
## 
## Group-Level Effects: 
## ~prompt (Number of levels: 8) 
##                           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept)                 2.81      1.40     1.21     6.44 1.00    11832
## sd(L1Japanese)                4.70      1.58     2.60     8.64 1.00    13471
## cor(Intercept,L1Japanese)    -0.35      0.46    -0.96     0.63 1.00     5298
##                           Tail_ESS
## sd(Intercept)                14587
## sd(L1Japanese)               21658
## cor(Intercept,L1Japanese)    10915
## 
## ~speaker (Number of levels: 43) 
##                       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept)             3.37      0.41     2.68     4.26 1.00    11063
## sd(vowelA)                2.33      0.36     1.70     3.10 1.00    13968
## sd(vowelU)                2.99      0.44     2.23     3.93 1.00    13094
## cor(Intercept,vowelA)    -0.42      0.15    -0.67    -0.10 1.00    17244
## cor(Intercept,vowelU)    -0.34      0.15    -0.61    -0.03 1.00    16272
## cor(vowelA,vowelU)        0.28      0.18    -0.09     0.60 1.00    10106
##                       Tail_ESS
## sd(Intercept)            18474
## sd(vowelA)               23274
## sd(vowelU)               22687
## cor(Intercept,vowelA)    23133
## cor(Intercept,vowelU)    24118
## cor(vowelA,vowelU)       18303
## 
## Population-Level Effects: 
##            Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept      2.50      2.45    -2.01     7.78 1.00     6946    12702
## vowelA        -5.80      4.03   -14.67     1.50 1.00     5240    10129
## vowelU        -4.63      2.67    -9.83     0.64 1.00     8753    14591
## L1Japanese    -2.20      2.03    -6.16     1.85 1.00    12534    18314
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     3.23      0.07     3.10     3.37 1.00    49223    31137
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
PC1.FPC1.R.m3.BAAP
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: fPC1 ~ L1 + (1 | speaker) + (1 + L1 | prompt) 
##    Data: dat.PC1.EN.R (Number of observations: 1321) 
##   Draws: 4 chains, each with iter = 12000; warmup = 2000; thin = 1;
##          total post-warmup draws = 40000
## 
## Group-Level Effects: 
## ~prompt (Number of levels: 8) 
##                           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept)                 2.82      0.99     1.55     5.29 1.00    11605
## sd(L1Japanese)                5.05      1.65     2.88     9.16 1.00    11156
## cor(Intercept,L1Japanese)     0.29      0.30    -0.35     0.78 1.00    12671
##                           Tail_ESS
## sd(Intercept)                18571
## sd(L1Japanese)               18420
## cor(Intercept,L1Japanese)    19353
## 
## ~speaker (Number of levels: 43) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     2.85      0.34     2.27     3.61 1.00     6225    11811
## 
## Population-Level Effects: 
##            Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     -0.86      1.32    -3.52     1.76 1.00     6943    11668
## L1Japanese    -2.14      2.11    -6.29     2.08 1.00     7262    12197
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     3.56      0.07     3.42     3.70 1.00    34942    28804
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
PC1.FPC1.R.m4.BAAP
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: fPC1 ~ vowel + (1 + vowel | speaker) + (1 | prompt) 
##    Data: dat.PC1.EN.R (Number of observations: 1321) 
##   Draws: 4 chains, each with iter = 12000; warmup = 2000; thin = 1;
##          total post-warmup draws = 40000
## 
## Group-Level Effects: 
## ~prompt (Number of levels: 8) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.51      0.78     0.69     3.48 1.00    10468    11954
## 
## ~speaker (Number of levels: 43) 
##                       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept)             3.46      0.42     2.75     4.38 1.00     7433
## sd(vowelA)                4.85      0.57     3.85     6.10 1.00     5871
## sd(vowelU)                3.39      0.45     2.60     4.37 1.00     9626
## cor(Intercept,vowelA)    -0.40      0.13    -0.64    -0.12 1.00     5163
## cor(Intercept,vowelU)    -0.42      0.14    -0.66    -0.12 1.00     9245
## cor(vowelA,vowelU)        0.52      0.13     0.24     0.73 1.00     9303
##                       Tail_ESS
## sd(Intercept)            13430
## sd(vowelA)               11617
## sd(vowelU)               16583
## cor(Intercept,vowelA)    10699
## cor(Intercept,vowelU)    16581
## cor(vowelA,vowelU)       16793
## 
## Population-Level Effects: 
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     2.61      1.12     0.42     4.79 1.00     8414    13685
## vowelA       -9.15      1.63   -12.28    -6.06 1.00     8214    12966
## vowelU       -6.02      1.65    -9.25    -2.78 1.00    12000    14690
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     3.26      0.07     3.13     3.40 1.00    47049    28256
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
# Model comparison using Bayes Factor: Interaction
comparison.PC1.FPC1.R.m1.m2.BAAP <- bayestestR::bayesfactor_models(PC1.FPC1.R.m1.BAAP, PC1.FPC1.R.m2.BAAP, denominator = PC1.FPC1.R.m1.BAAP) # denominator: the model against which comparison is performed

comparison.PC1.FPC1.R.m1.m2.BAAP
## Bayes Factors for Model Comparison
## 
##     Model                                                     BF
## [2] vowel + L1 + (1 + vowel | speaker) + (1 + L1 | prompt) 0.802
## 
## * Against Denominator: [1] vowel + L1 + L1:vowel + (1 + vowel | speaker) + (1 + L1 | prompt)
## *   Bayes Factor Type: marginal likelihoods (bridgesampling)
# plot
plot(comparison.PC1.FPC1.R.m1.m2.BAAP, n_pies = "one", value = "BF")

# Model comparison using Bayes Factor: Fixed effects
comparison.fixed.effect.PC1.FPC1.R.BAAP <- bayestestR::bayesfactor_models(PC1.FPC1.R.m2.BAAP, PC1.FPC1.R.m3.BAAP, PC1.FPC1.R.m4.BAAP, denominator = PC1.FPC1.R.m2.BAAP)

comparison.fixed.effect.PC1.FPC1.R.BAAP
## Bayes Factors for Model Comparison
## 
##     Model                                              BF
## [2] L1 + (1 | speaker) + (1 + L1 | prompt)       3.79e-22
## [3] vowel + (1 + vowel | speaker) + (1 | prompt) 1.40e-09
## 
## * Against Denominator: [1] vowel + L1 + (1 + vowel | speaker) + (1 + L1 | prompt)
## *   Bayes Factor Type: marginal likelihoods (bridgesampling)
# plot
plot(comparison.fixed.effect.PC1.FPC1.R.BAAP, n_pies = "one", value = "BF")

5.2.4 saving model for future analysis

# save models for future analysis
save(PC1.FPC1.R.m1.BAAP, file = "model/PC1.FPC1.R.m1.BAAP.rda")
save(PC1.FPC1.R.m2.BAAP, file = "model/PC1.FPC1.R.m2.BAAP.rda")
save(PC1.FPC1.R.m3.BAAP, file = "model/PC1.FPC1.R.m3.BAAP.rda")
save(PC1.FPC1.R.m4.BAAP, file = "model/PC1.FPC1.R.m4.BAAP.rda")

5.2.5 visualising model

# obtain posterior draws (sampled values)
load(file = "model/PC1.FPC1.R.m1.BAAP.rda")

post_data_R_PC1_FPC1_BAAP <- PC1.FPC1.R.m1.BAAP %>% 
  emmeans::emmeans(~ L1*vowel, epred = TRUE) %>% 
  tidybayes::gather_emmeans_draws()

# obtain highest density interval
post_data_R_PC1_FPC1_BAAP_hdi <- post_data_R_PC1_FPC1_BAAP %>% 
  tidybayes::median_hdi() %>% 
  mutate(
    vowel = case_when(
      vowel == "A" ~ "/a/",
      vowel == "I" ~ "/i/",
      vowel == "U" ~ "/u/"
    ),
    L1 = case_when(
      L1 == "English" ~ "L1 English",
      L1 == "Japanese" ~ "L1 Japanese"
    ))

# renaming vowel labels
post_data_R_PC1_FPC1_BAAP <- post_data_R_PC1_FPC1_BAAP %>% 
  mutate(
    vowel = case_when(
      vowel == "A" ~ "/a/",
      vowel == "I" ~ "/i/",
      vowel == "U" ~ "/u/"
    ),
    L1 = case_when(
      L1 == "English" ~ "L1 English",
      L1 == "Japanese" ~ "L1 Japanese"
    ))

# plotting posterior distribution
post_data_R_PC1_FPC1_BAAP_plot <- ggplot() +
  geom_point(data = post_data_R_PC1_FPC1_BAAP_hdi, aes(x = vowel, y = .value, colour = vowel, shape = vowel), size = 6, stroke = 2, show.legend = FALSE) +
  geom_errorbar(data = post_data_R_PC1_FPC1_BAAP_hdi, aes(x = vowel, y = .value, ymin = .lower, ymax = .upper), size = 2, width = 0.8, show.legend = FALSE) +
  ggbeeswarm::geom_quasirandom(data = post_data_R_PC1_FPC1_BAAP, aes(x = vowel, y = .value, colour = vowel), alpha = 0.008, size = 0.01, width = 0.3, dodge.width = 0.3, show.legend = FALSE) +
  geom_hline(yintercept = 0, size = 0.5, linetype = 2) +
  facet_wrap(~ factor(L1, levels = c("L1 English", "L1 Japanese"))) +
  scale_colour_manual(values = c("blue4", "brown4", "darkolivegreen")) +
  scale_y_continuous(limits = c(-20, 20)) +
  theme_classic() +
  theme(legend.text = element_text(size = 30),
        legend.key.size = unit(2, 'cm'),
        legend.position = "bottom",
        legend.title = element_text(size = 30),
        axis.text.x = element_text(size = 30),
        # axis.title.x = element_text(size = 20),
        axis.title.x = element_blank(),
        axis.title.y = element_text(size = 20),
        plot.title = element_text(size = 30, hjust = 0, face = "bold"),
        strip.text.x = element_text(size = 30),
        strip.text.y = element_text(angle = 0, size = 30)
  ) +
  labs(x = "Vowel", y = "FPC1") +
  labs(title = "/Éą/: FPC1 for PC1")

post_data_R_PC1_FPC1_BAAP_plot

5.2.6 model details with probability of direction

# calculate proportion of posterior distribution away from zero
EngI_R_PC1_FPC1_BAAP_pd <- length(which(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 English" & post_data_R_PC1_FPC1_BAAP$vowel == "/i/"] > 0)) / length(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 English" & post_data_R_PC1_FPC1_BAAP$vowel == "/i/"]) # EngI_R: positive

EngA_R_PC1_FPC1_BAAP_pd <- length(which(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 English" & post_data_R_PC1_FPC1_BAAP$vowel == "/a/"] < 0)) / length(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 English" & post_data_R_PC1_FPC1_BAAP$vowel == "/a/"]) # EngA_R: negative

EngU_R_PC1_FPC1_BAAP_pd <- length(which(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 English" & post_data_R_PC1_FPC1_BAAP$vowel == "/u/"] < 0)) / length(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 English" & post_data_R_PC1_FPC1_BAAP$vowel == "/u/"]) # EngU_R: negative

JapI_R_PC1_FPC1_BAAP_pd <- length(which(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 Japanese" & post_data_R_PC1_FPC1_BAAP$vowel == "/i/"] > 0)) / length(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 Japanese" & post_data_R_PC1_FPC1_BAAP$vowel == "/i/"]) # JapI_R: positive

JapA_R_PC1_FPC1_BAAP_pd <- length(which(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 Japanese" & post_data_R_PC1_FPC1_BAAP$vowel == "/a/"] < 0)) / length(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 Japanese" & post_data_R_PC1_FPC1_BAAP$vowel == "/a/"]) # JapA_R: negative

JapU_R_PC1_FPC1_BAAP_pd <- length(which(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 Japanese" & post_data_R_PC1_FPC1_BAAP$vowel == "/u/"] < 0)) / length(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 Japanese" & post_data_R_PC1_FPC1_BAAP$vowel == "/u/"]) # JapU_R: negative

# probability data nerged together
R_PC1_FPC1_BAAP_pd <- c(EngI_R_PC1_FPC1_BAAP_pd, EngA_R_PC1_FPC1_BAAP_pd, EngU_R_PC1_FPC1_BAAP_pd, JapI_R_PC1_FPC1_BAAP_pd, JapA_R_PC1_FPC1_BAAP_pd, JapU_R_PC1_FPC1_BAAP_pd)

R_PC1_FPC1_pd_BAAP_hdi <- data.frame(post_data_R_PC1_FPC1_BAAP_hdi, R_PC1_FPC1_BAAP_pd)

R_PC1_FPC1_pd_BAAP_hdi
##            L1 vowel    .value      .lower     .upper .width .point .interval
## 1  L1 English   /i/  1.240521  -1.9527241  4.3356258   0.95 median       hdi
## 2  L1 English   /a/ -2.008996  -5.1066147  1.0676416   0.95 median       hdi
## 3  L1 English   /u/ -2.421176  -6.0895511  1.2920743   0.95 median       hdi
## 4 L1 Japanese   /i/  3.301085   0.5989382  5.9901645   0.95 median       hdi
## 5 L1 Japanese   /a/ -8.797508 -11.5136751 -6.1293067   0.95 median       hdi
## 6 L1 Japanese   /u/ -3.888072  -7.0499718 -0.6145064   0.95 median       hdi
##   R_PC1_FPC1_BAAP_pd
## 1           0.801025
## 2           0.911975
## 3           0.914425
## 4           0.987000
## 5           0.999600
## 6           0.986250

5.2.7 quantifying vowel context contrast using emmeans

# quantifying contrasts
## get the adjusted means
PC1.FPC1.R.m1.BAAP.em <- emmeans::emmeans(PC1.FPC1.R.m1.BAAP,  ~ vowel|L1)
PC1.FPC1.R.m1.BAAP.em
## L1 = English:
##  vowel emmean lower.HPD upper.HPD
##  I       1.24    -1.966     4.320
##  A      -2.01    -5.107     1.068
##  U      -2.42    -6.090     1.292
## 
## L1 = Japanese:
##  vowel emmean lower.HPD upper.HPD
##  I       3.30     0.556     5.940
##  A      -8.80   -11.514    -6.129
##  U      -3.89    -7.051    -0.619
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95
## get all possible contrasts
PC1.FPC1.R.m1.BAAP.cont <- emmeans::contrast(PC1.FPC1.R.m1.BAAP.em, "tukey")
PC1.FPC1.R.m1.BAAP.cont
## L1 = English:
##  contrast estimate lower.HPD upper.HPD
##  I - A       3.245    -0.696      7.11
##  I - U       3.646    -0.794      7.97
##  A - U       0.411    -4.061      4.78
## 
## L1 = Japanese:
##  contrast estimate lower.HPD upper.HPD
##  I - A      12.088     8.484     15.63
##  I - U       7.184     3.147     11.16
##  A - U      -4.898    -8.793     -0.83
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95
## get the posterior draws from the contrasts
PC1_FPC1_R_cont_BAAP_posterior <- tidybayes::gather_emmeans_draws(PC1.FPC1.R.m1.BAAP.cont) %>% 
  mutate(
    L1 = case_when(
      L1 == "English" ~ "L1 English",
      L1 == "Japanese" ~ "L1 Japanese"
    )
  )

# calculating probability of direction 
## L1 English
EngIA_R_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "I - A"] > 0)) / length(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "I - A"]) # EngIA_R: positive

EngIU_R_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "I - U"] > 0)) / length(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "I - U"]) # EngIU_R: positive

EngAU_R_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "A - U"] > 0)) / length(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "A - U"]) # EngAU_R: positive

## L1 Japanese
JapIA_R_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "I - A"] > 0)) / length(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "I - A"]) # JapIA_R: positive

JapIU_R_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "I - U"] > 0)) / length(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "I - U"]) # JapIU_R: positive

JapAU_R_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "A - U"] < 0)) / length(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "A - U"]) # JapAU_R: negative

### contrast probability of direction merged together
R_PC1_FPC1_contrast_BAAP <- c(EngIA_R_PC1_FPC1_BAAP_pd, EngIU_R_PC1_FPC1_BAAP_pd, EngAU_R_PC1_FPC1_BAAP_pd, JapIA_R_PC1_FPC1_BAAP_pd, JapIU_R_PC1_FPC1_BAAP_pd, JapAU_R_PC1_FPC1_BAAP_pd)

### merge contrast pd with hdi/median
R_PC1_FPC1_contrast_BAAP <- data.frame(PC1.FPC1.R.m1.BAAP.cont, R_PC1_FPC1_contrast_BAAP) %>% rename(
  PD = R_PC1_FPC1_contrast_BAAP
)

### results
R_PC1_FPC1_contrast_BAAP %>% 
  mutate(across(where(is.numeric), ~ round(., digits = 2))) %>% 
  mutate(
    contrast = factor(contrast, levels = c("A - U", "I - U", "I - A"))
  ) %>% 
  arrange(L1, contrast)
##   contrast       L1 estimate lower.HPD upper.HPD   PD
## 1    A - U  English     0.41     -4.06      4.78 0.59
## 2    I - U  English     3.65     -0.79      7.97 0.96
## 3    I - A  English     3.25     -0.70      7.11 0.96
## 4    A - U Japanese    -4.90     -8.79     -0.83 0.99
## 5    I - U Japanese     7.18      3.15     11.16 1.00
## 6    I - A Japanese    12.09      8.48     15.63 1.00

5.2.8 ploting vowel contrast

## plot
PC1_FPC1_R_cont_posterior_BAAP_plot <- PC1_FPC1_R_cont_BAAP_posterior %>% 
  ggplot(aes(y = contrast, x = .value)) +
  # tidybayes::stat_halfeye(point_interval = "median_hdi", aes(fill = after_stat(level)), .width = c(.66, .95, .99)) +
  tidybayes::stat_slab(aes(fill = after_stat(level), slab_alpha = 0.4), point_interval = "median_hdi", .width = c(.89, .95, .99)) +
  tidybayes::stat_pointinterval(point_interval = "median_hdi", .width = c(.89, .95, .99)) +
  # scale_fill_brewer(na.translate = FALSE) +
  scale_fill_manual(values = c("darkolivegreen", "blue4",  "brown4"), na.translate = FALSE) +
  scale_x_continuous(limits = c(-20, 20)) +
  facet_wrap(~ L1) +
  geom_vline(xintercept = 0, lty = 2) +
  theme_classic() +
  guides(fill = guide_legend(override.aes = list(alpha = 0.4), title = "HDI")) +
  labs(x = "difference", title = "/Éą/: PC1/FPC1 difference") +
  theme(legend.text = element_text(size = 30),
        # legend.key.size = unit(2, 'cm'),
        legend.position = "bottom",
        legend.title = element_text(size = 30),
        axis.text = element_text(size = 15),
        axis.text.y = element_text(size = 25),
        # axis.title = element_text(size = 20),
        axis.title = element_blank(),
        plot.title = element_text(size = 30, hjust = 0, face = "bold"),
        strip.text.x = element_text(size = 30),
        strip.text.y = element_text(angle = 0, size = 30))

PC1_FPC1_R_cont_posterior_BAAP_plot

5.2.9 slide plot

R.BAAP <- ggarrange(post_data_R_PC1_FPC1_BAAP_plot, PC1_FPC1_R_cont_posterior_BAAP_plot, common.legend = FALSE, legend = "bottom", ncol = 1) 

R.BAAP

## saving plot
ggsave(R.BAAP, filename = "figure/PC1_FPC1_R.png", width = 8, height = 10, dpi = 1000)

6 session info

sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Monterey 12.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/London
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] emmeans_1.8.9   emuR_2.4.2      ggsci_3.0.0     ggpubr_0.6.0   
##  [5] gridExtra_2.3   scales_1.3.0    brms_2.20.4     Rcpp_1.0.11    
##  [9] lubridate_1.9.3 forcats_1.0.0   stringr_1.5.1   dplyr_1.1.4    
## [13] purrr_1.0.2     readr_2.1.4     tidyr_1.3.0     tibble_3.2.1   
## [17] tidyverse_2.0.0 ggplot2_3.5.0   fdapace_0.5.9  
## 
## loaded via a namespace (and not attached):
##   [1] tensorA_0.36.2.1     rstudioapi_0.15.0    jsonlite_1.8.8      
##   [4] magrittr_2.0.3       ggbeeswarm_0.7.2     estimability_1.4.1  
##   [7] farver_2.1.1         rmarkdown_2.25       ragg_1.2.6          
##  [10] vctrs_0.6.5          base64enc_0.1-3      rstatix_0.7.2       
##  [13] htmltools_0.5.7      distributional_0.4.0 curl_5.1.0          
##  [16] tidybayes_3.0.6      broom_1.0.5          Formula_1.2-5       
##  [19] sass_0.4.8           StanHeaders_2.26.28  pracma_2.4.4        
##  [22] bslib_0.6.1          htmlwidgets_1.6.4    plyr_1.8.9          
##  [25] zoo_1.8-12           cachem_1.0.8         uuid_1.1-1          
##  [28] igraph_1.5.1         mime_0.12            lifecycle_1.0.4     
##  [31] pkgconfig_2.0.3      colourpicker_1.3.0   Matrix_1.6-1.1      
##  [34] R6_2.5.1             fastmap_1.1.1        shiny_1.8.0         
##  [37] digest_0.6.34        numDeriv_2016.8-1.1  colorspace_2.1-0    
##  [40] ps_1.7.5             textshaping_0.3.7    crosstalk_1.2.0     
##  [43] Hmisc_5.1-1          labeling_0.4.3       fansi_1.0.6         
##  [46] timechange_0.2.0     mgcv_1.9-0           abind_1.4-5         
##  [49] compiler_4.3.2       withr_3.0.0          htmlTable_2.4.2     
##  [52] backports_1.4.1      inline_0.3.19        shinystan_2.6.0     
##  [55] carData_3.0-5        DBI_1.1.3            highr_0.10          
##  [58] QuickJSR_1.0.8       pkgbuild_1.4.2       ggsignif_0.6.4      
##  [61] MASS_7.3-60          gtools_3.9.4         loo_2.7.0           
##  [64] tools_4.3.2          vipor_0.4.5          foreign_0.8-85      
##  [67] beeswarm_0.4.0       httpuv_1.6.13        threejs_0.3.3       
##  [70] nnet_7.3-19          glue_1.7.0           callr_3.7.3         
##  [73] nlme_3.1-163         promises_1.2.1       checkmate_2.3.1     
##  [76] cluster_2.1.4        reshape2_1.4.4       see_0.8.1           
##  [79] generics_0.1.3       gtable_0.3.4         tzdb_0.4.0          
##  [82] data.table_1.14.8    hms_1.1.3            car_3.1-2           
##  [85] utf8_1.2.4           ggdist_3.3.0         pillar_1.9.0        
##  [88] markdown_1.11        posterior_1.5.0      later_1.3.2         
##  [91] splines_4.3.2        lattice_0.21-9       tidyselect_1.2.0    
##  [94] miniUI_0.1.1.1       knitr_1.45           arrayhelpers_1.1-0  
##  [97] V8_4.4.1             stats4_4.3.2         xfun_0.41           
## [100] bridgesampling_1.1-2 matrixStats_1.2.0    DT_0.30             
## [103] rstan_2.32.3         stringi_1.8.2        yaml_2.3.7          
## [106] evaluate_0.23        codetools_0.2-19     cli_3.6.2           
## [109] RcppParallel_5.1.7   rpart_4.1.21         systemfonts_1.0.5   
## [112] shinythemes_1.2.0    xtable_1.8-4         munsell_0.5.0       
## [115] processx_3.8.2       jquerylib_0.1.4      coda_0.19-4         
## [118] svUnit_1.0.6         wrassp_1.0.4         parallel_4.3.2      
## [121] rstantools_2.3.1.1   ellipsis_0.3.2       prettyunits_1.2.0   
## [124] bayestestR_0.13.1    dygraphs_1.1.1.6     bayesplot_1.10.0    
## [127] Brobdingnag_1.2-9    mvtnorm_1.2-4        xts_0.13.1          
## [130] insight_0.19.6       crayon_1.5.2         rlang_1.1.3         
## [133] cowplot_1.1.1        shinyjs_2.1.0